<!DOCTYPE article
PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD with MathML3 v1.3 20210610//EN" "JATS-archivearticle1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.3" xml:lang="en" article-type="research-article"><?properties manuscript?><processing-meta base-tagset="archiving" mathml-version="3.0" table-model="xhtml" tagset-family="jats"><restricted-by>pmc</restricted-by></processing-meta><front><journal-meta><journal-id journal-id-type="nlm-journal-id">101778136</journal-id><journal-id journal-id-type="pubmed-jr-id">50558</journal-id><journal-id journal-id-type="nlm-ta">ACS ES T Water</journal-id><journal-id journal-id-type="iso-abbrev">ACS ES T Water</journal-id><journal-title-group><journal-title>ACS ES&#x00026;T water</journal-title></journal-title-group><issn pub-type="epub">2690-0637</issn></journal-meta><article-meta><article-id pub-id-type="pmid">39734778</article-id><article-id pub-id-type="pmc">11672865</article-id><article-id pub-id-type="doi">10.1021/acsestwater.4c00839</article-id><article-id pub-id-type="manuscript">HHSPA2041601</article-id><article-categories><subj-group subj-group-type="heading"><subject>Article</subject></subj-group></article-categories><title-group><article-title>Performance of Conditional Random Forest and Regression Models at Predicting Human Fecal Contamination of Produce Irrigation Ponds in the Southeastern United States</article-title></title-group><contrib-group><contrib contrib-type="author"><name><surname>Hofstetter</surname><given-names>Jessica</given-names></name><xref rid="FN1" ref-type="author-notes">&#x02225;</xref><aff id="A1">Waterborne Disease Prevention Branch, Centers for Disease Control and Prevention, Atlanta, Georgia 30333, United States; Chenega Enterprise Systems &#x00026; Solutions, LLC, Chesapeake, Virginia 23320, United States; Department of Horticulture, Auburn University, Auburn, Alabama 36849, United States</aff></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">http://orcid.org/0000-0003-4055-7164</contrib-id><name><surname>Holcomb</surname><given-names>David A.</given-names></name><xref rid="FN1" ref-type="author-notes">&#x02225;</xref><aff id="A2">Waterborne Disease Prevention Branch, Centers for Disease Control and Prevention, Atlanta, Georgia 30333, United States</aff></contrib><contrib contrib-type="author"><name><surname>Kahler</surname><given-names>Amy M.</given-names></name><aff id="A3">Waterborne Disease Prevention Branch, Centers for Disease Control and Prevention, Atlanta, Georgia 30333, United States</aff></contrib><contrib contrib-type="author"><name><surname>Rodrigues</surname><given-names>Camila</given-names></name><aff id="A4">Department of Horticulture, Auburn University, Auburn, Alabama 36849, United States</aff></contrib><contrib contrib-type="author"><name><surname>da Silva</surname><given-names>Andre Luiz Biscaia Ribeiro</given-names></name><aff id="A5">Department of Horticulture, Auburn University, Auburn, Alabama 36849, United States</aff></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">http://orcid.org/0000-0002-7318-5240</contrib-id><name><surname>Mattioli</surname><given-names>Mia C.</given-names></name><aff id="A6">Waterborne Disease Prevention Branch, Centers for Disease Control and Prevention, Atlanta, Georgia 30333, United States</aff></contrib></contrib-group><author-notes><fn fn-type="equal" id="FN1"><label>&#x02225;</label><p id="P1">Jessica Hofstetter and David A. Holcomb contributed equally and should be designated co-first authors.</p></fn><fn fn-type="con" id="FN2"><p id="P2">Author Contributions</p><p id="P3">CRediT: <bold>Jessica Hofstetter</bold> data curation, formal analysis, investigation, methodology, project administration, software, supervision, writing - original draft, writing - review &#x00026; editing; <bold>David A. Holcomb</bold> formal analysis, methodology, software, validation, visualization, writing - original draft, writing - review &#x00026; editing; <bold>Amy M. Kahler</bold> conceptualization, data curation, investigation, methodology, project administration, writing - review &#x00026; editing; <bold>Camila Rodrigues</bold> project administration, supervision, writing - review &#x00026; editing; <bold>Andre Luiz Biscaia Ribeiro da Silva</bold> conceptualization, funding acquisition, methodology, project administration, supervision; <bold>Mia C. Mattioli</bold> conceptualization, funding acquisition, investigation, methodology, project administration, resources, supervision, writing - review &#x00026; editing.</p></fn><corresp id="CR1"><bold>Corresponding Author: Mia C. Mattioli</bold> &#x02013; Waterborne Disease Prevention Branch, Centers for Disease Control and Prevention, Atlanta, Georgia 30333, United States; <email>mmattioli@cdc.gov</email></corresp></author-notes><pub-date pub-type="nihms-submitted"><day>12</day><month>12</month><year>2024</year></pub-date><pub-date pub-type="ppub"><day>27</day><month>11</month><year>2024</year></pub-date><pub-date pub-type="pmc-release"><day>27</day><month>12</month><year>2024</year></pub-date><volume>4</volume><issue>12</issue><fpage>5844</fpage><lpage>5855</lpage><abstract id="ABS1"><p id="P4">Irrigating fresh produce with contaminated water contributes to the burden of foodborne illness. Identifying fecal contamination of irrigation waters and characterizing fecal sources and associated environmental factors can help inform fresh produce safety and health hazard management. Using two previously collected data sets, we developed and evaluated the performance of logistic regression and conditional random forest models for predicting general and human-specific fecal contamination of ponds in southwest Georgia used for fresh produce irrigation. Generic <italic toggle="yes">Escherichia coli</italic> served as a general fecal indicator, and human-associated <italic toggle="yes">Bacteroides</italic> (HF183), crAssphage, and F+ coliphage genogroup II were used as indicators of human fecal contamination. Increased rainfall in the previous 7 days and the presence of a building within 152 m (a proxy for proximity to septic systems) were associated with increased odds of human fecal contamination in the training data set. However, the models did not accurately predict the presence of human-associated fecal indicators in a second data set collected from nearby irrigation ponds in different years. Predictive statistical models should be used with caution to assess produce irrigation water quality as models may not reliably predict fecal contamination at other locations and times, even within the same growing region.</p></abstract><abstract id="ABS2" abstract-type="graphical"><title>Graphical Abstract</title><p id="P5">
<graphic xlink:href="nihms-2041601-f0001.jpg" position="anchor"/>
</p></abstract><kwd-group><kwd>microbial source tracking</kwd><kwd>quantitative polymerase chain reaction (qPCR)</kwd><kwd>dead-end ultrafiltration (DEUF)</kwd><kwd>predictive modeling</kwd><kwd>conditional random forest</kwd><kwd>agricultural water</kwd><kwd>fresh produce safety</kwd><kwd>foodborne illness</kwd></kwd-group></article-meta></front><body><sec id="S1"><title>INTRODUCTION</title><p id="P6">The United States Interagency Food Safety Analytics Collaboration (IFSAC) estimates that among the 1,322 foodborne outbreaks between 1998 and 2021, produce was the vehicle for 43% of foodborne illnesses from <italic toggle="yes">Salmonella</italic>, 52% of <italic toggle="yes">Listeria monocytogenes</italic> illnesses, and 67% of <italic toggle="yes">Escherichia coli</italic> O157 illnesses.<sup><xref rid="R1" ref-type="bibr">1</xref>,<xref rid="R2" ref-type="bibr">2</xref></sup> Preharvest application of poor microbial-quality water is one way that fruits and vegetables can become contaminated with foodborne pathogens.<sup><xref rid="R3" ref-type="bibr">3</xref></sup> Surface water is more likely than groundwater to be exposed to fecal contamination from humans and animals and may pose a greater risk to human health when used for irrigation.<sup><xref rid="R3" ref-type="bibr">3</xref></sup> As such, an important component of fresh produce safety hazard management is the ability to identify times when irrigation water may be contaminated and the sources and factors contributing to contamination. One of the most widely used methods for evaluating microbial water quality is measuring generic <italic toggle="yes">E. coli</italic> as a general indicator of fecal contamination.<sup><xref rid="R4" ref-type="bibr">4</xref></sup> However, the utility of this fecal indicator in untreated irrigation water for fresh produce production is debated. The US Environmental Protection Agency (EPA) has recommended threshold values for generic <italic toggle="yes">E. coli</italic> levels, such as a geometric mean concentration &#x02265;126 <italic toggle="yes">E. coli</italic> per 100 mL, to identify impaired microbial water quality in surface water used for recreation.<sup><xref rid="R5" ref-type="bibr">5</xref></sup>
<italic toggle="yes">E. coli</italic> concentrations exceeding these thresholds are associated with higher rates of illness among swimmers. It has also been suggested that these thresholds be applied to irrigation waters,<sup><xref rid="R6" ref-type="bibr">6</xref></sup> but generic <italic toggle="yes">E. coli</italic> levels are not consistently associated with pathogen presence in irrigation water,<sup><xref rid="R7" ref-type="bibr">7</xref></sup> and several pathogens that cause significant human foodborne illness, such as <italic toggle="yes">Salmonella</italic> and pathogenic <italic toggle="yes">E. coli</italic>, have been detected in irrigation water sources even when generic <italic toggle="yes">E. coli</italic> was not detected or levels were below the EPA recreational water quality thresholds.<sup><xref rid="R8" ref-type="bibr">8</xref>,<xref rid="R9" ref-type="bibr">9</xref></sup></p><p id="P7">Generic <italic toggle="yes">E. coli</italic> can arise from many animals and other aquatic sources, which limits its use for characterizing of fecal sources.<sup><xref rid="R10" ref-type="bibr">10</xref></sup> Many foodborne illnesses associated with produce, such as norovirus GI, GII, and GIV; hepatitis A types I, II, and III; hepatitis E types 1&#x02013;4 and 7; and the parasite <italic toggle="yes">Cyclospora cayetanensis</italic>, are solely associated with human contamination.<sup><xref rid="R11" ref-type="bibr">11</xref>&#x02013;<xref rid="R14" ref-type="bibr">14</xref></sup> These pathogens have been found in water impacted by human fecal contamination and subsequently in produce grown using these water sources.<sup><xref rid="R15" ref-type="bibr">15</xref>,<xref rid="R16" ref-type="bibr">16</xref></sup> This highlights the importance of characterizing human-specific fecal contamination in irrigation water for remediation and the mitigation of health risks.</p><p id="P8">Testing produce irrigation water for microbial source tracking (MST) markers is a strategy for determining fecal contamination sources.<sup><xref rid="R17" ref-type="bibr">17</xref></sup> Previous studies have highlighted the importance of considering multiple MST markers to account for differences in marker decay rates and abundance in the host feces, particularly when low levels of contamination are suspected.<sup><xref rid="R18" ref-type="bibr">18</xref>&#x02013;<xref rid="R20" ref-type="bibr">20</xref></sup> Molecular assays that target gene sequences from <italic toggle="yes">Bacteroides</italic> in human feces (e.g., HF183) have been developed and widely implemented as MST markers to infer the presence of human fecal contamination in environmental samples.<sup><xref rid="R21" ref-type="bibr">21</xref>&#x02013;<xref rid="R25" ref-type="bibr">25</xref></sup> CrAssphage, a recently identified virus of <italic toggle="yes">Bacteroides</italic> that is abundant in human feces, has also been used as a sensitive and human-specific MST marker.<sup><xref rid="R26" ref-type="bibr">26</xref>,<xref rid="R27" ref-type="bibr">27</xref></sup> As a virus, crAssphage is more biologically similar to human-specific enteric viral pathogens than bacterial fecal indicators and has been associated with enteric viruses in environmental waters.<sup><xref rid="R28" ref-type="bibr">28</xref></sup> The associations between <italic toggle="yes">E. coli</italic> levels and the presence of HF183 or crAssphage in surface waters reported previously have been inconsistent and limited, and the environmental factors associated with generic <italic toggle="yes">E. coli</italic> levels differ from those associated with HF183 occurrence.<sup><xref rid="R28" ref-type="bibr">28</xref>&#x02013;<xref rid="R32" ref-type="bibr">32</xref></sup> Male-specific (F+) coliphages, which infect coliform bacteria like <italic toggle="yes">E. coli</italic>, have also been widely used as fecal indicator viruses and can be detected in environmental samples by conventional culture methods.<sup><xref rid="R33" ref-type="bibr">33</xref></sup> Although F+ coliphages in general are not host-specific, F+ RNA (FRNA) coliphage genogroup II (GII) has been associated primarily with human feces and used as a human MST marker.<sup><xref rid="R34" ref-type="bibr">34</xref>&#x02013;<xref rid="R36" ref-type="bibr">36</xref></sup></p><p id="P9">Previous water quality modeling studies have found associations between microbial contamination measured by <italic toggle="yes">E. coli</italic> and HF183 and various environmental factors, including pH, conductivity, turbidity, season, temperature, precipitation, and land use.<sup><xref rid="R8" ref-type="bibr">8</xref>,<xref rid="R37" ref-type="bibr">37</xref>&#x02013;<xref rid="R42" ref-type="bibr">42</xref></sup> However, the specific factors associated with fecal contamination varied between studies, water source type, and location. Regression modeling has previously been used to determine significant factors for predicting human fecal contamination in ambient recreational water,<sup><xref rid="R40" ref-type="bibr">40</xref></sup> while recent advances in machine learning modeling approaches have been applied to predicting pathogens in certain agricultural settings.<sup><xref rid="R43" ref-type="bibr">43</xref></sup> The environmental factors identified as driving contamination often vary based on modeling approach. For example, regression and machine learning models previously identified different explanatory variables as important predictors of <italic toggle="yes">Salmonella</italic> and enterohemorrhagic <italic toggle="yes">E. coli</italic> markers in irrigation water.<sup><xref rid="R44" ref-type="bibr">44</xref></sup></p><p id="P10">Tools to identify fecal contamination and characterize fecal sources and associated environmental factors in irrigation water could help growers manage hazards for fresh produce safety. The discrepancies in the apparent drivers of contamination identified in the literature suggest that conducting agricultural setting-specific water quality assessments that consider multiple microbial targets, environmental factors, and modeling approaches may be necessary to adequately characterize microbial hazards in irrigation water. Previous models of human fecal contamination in US irrigation waters have focused on predicting the presence of bacterial MST markers (e.g., HF183) in streams;<sup><xref rid="R45" ref-type="bibr">45</xref></sup> to our knowledge, comparable models for viral markers like crAssphage and for nonflowing water sources like irrigation ponds have not previously been reported. In this study, we developed models to evaluate environmental factors associated with the detection of four fecal markers in irrigation ponds in the southeast United States: generic <italic toggle="yes">E. coli</italic>, HF183, crAssphage, and FRNA GII coliphage. We then tested the predictive performance of the models on a separate data set that had been collected previously from the same growing region.</p></sec><sec id="S2"><title>MATERIALS AND METHODS</title><sec id="S3"><title>Study Area.</title><p id="P11">The data used in this study were collected from irrigation ponds on farms in southwest Georgia. Sites were located in a region with subtropical environmental conditions characterized by coarse-textured and well-drained soils used for agriculture, pasture, and mixed forests.<sup><xref rid="R46" ref-type="bibr">46</xref></sup> The ponds used for produce irrigation were located in the Little River watershed in the headwaters of the Suwannee River basin.<sup><xref rid="R47" ref-type="bibr">47</xref></sup> The irrigation ponds in the test data set were located within 0.5 to 10 miles (0.8&#x02013;16 km) of the ponds sampled for the training data set. All ponds in the training data set and two of the three ponds in the test data set were reported to be surface water-fed, while one pond in the test data set was groundwater-fed.</p></sec><sec id="S4"><title>Training Data Set.</title><p id="P12">The training data set was collected as part of a study monitoring the occurrence of <italic toggle="yes">C. cayetanensis</italic> in produce irrigation water.<sup><xref rid="R48" ref-type="bibr">48</xref></sup> Large-volume pond water samples (50 L) were collected by dead-end ultrafiltration (DEUF) from eight ponds serving two growers (A and B) one or two times per month from September 2020 through December 2021. Generic <italic toggle="yes">E. coli</italic> was enumerated from 100 mL grab samples collected alongside the DEUF samples within six hours of collection using the IDEXX Colilert-18 Quanti-Tray 2000 method (IDEXX Laboratories, Westbrooke, ME). DEUF samples were shipped on ice to the US Centers for Disease Control and Prevention (CDC) to be backflushed and further concentrated by centrifugation (4000<italic toggle="yes">g</italic> for 15 min) within 48 h of collection, as previously described.<sup><xref rid="R48" ref-type="bibr">48</xref>,<xref rid="R49" ref-type="bibr">49</xref></sup></p><p id="P13">DNA was extracted from 200 <italic toggle="yes">&#x003bc;</italic>L of DEUF concentrates using the Qiagen AllPrep PowerViral DNA/RNA Kit (Qiagen, Hilden, Germany). Isolated DNA was immediately subjected to molecular analysis. Detailed sample processing and molecular analysis methods have been described previously.<sup><xref rid="R48" ref-type="bibr">48</xref></sup> Briefly, human-associated genetic markers were amplified by quantitative polymerase chain reaction (qPCR) in triplicate following EPA Method 1696 to detect HF183 and the CPQ_056 assay to detect crAssphage.<sup><xref rid="R26" ref-type="bibr">26</xref>,<xref rid="R48" ref-type="bibr">48</xref>,<xref rid="R50" ref-type="bibr">50</xref></sup> HF183 and crAssphage were considered to have been detected in a sample when two of the three qPCR replicates for a given assay with demonstrated amplification above a threshold of 0.03 &#x00394;Rn and a quantification cycle (<italic toggle="yes">C</italic><sub>q</sub>) value below 40.<sup><xref rid="R48" ref-type="bibr">48</xref></sup></p></sec><sec id="S5"><title>Test Data Set.</title><p id="P14">The test data set consisted of samples collected from three additional ponds between May 2015 and May 2016 as part of a study to evaluate large-volume sample collection for characterizing foodborne pathogens and indicators in irrigation water.<sup><xref rid="R51" ref-type="bibr">51</xref></sup> During each sampling visit, 1 L grab samples and 50 L DEUF samples were collected at two pond-edge locations per pond. Generic <italic toggle="yes">E. coli</italic> concentrations were measured separately in each grab sample by the same Colilert-18 method and averaged for further analysis. The two DEUF samples collected per pond were separately backflushed using the same procedure as for the training set ultrafilters. Prior to secondary concentration, the backflush was analyzed for male-specific (F+) coliphage viruses using the EPA Single Agar Layer (SAL) method,<sup><xref rid="R52" ref-type="bibr">52</xref></sup> followed by F+ RNA coliphage (FRNA) genotyping as described elsewhere.<sup><xref rid="R51" ref-type="bibr">51</xref>,<xref rid="R53" ref-type="bibr">53</xref></sup> Half the remaining backflush volume was further concentrated by poly(ethylene glycol) (PEG) precipitation and centrifugation at 10,000<italic toggle="yes">g</italic> for 30 min,<sup><xref rid="R54" ref-type="bibr">54</xref></sup> and the other half was concentrated by centrifugation alone (4000<italic toggle="yes">g</italic> for 30 min), yielding four ultrafilter concentrates per pond per sampling visit. Nucleic acid was extracted from 750 <italic toggle="yes">&#x003bc;</italic>l of each concentrate by the Universal Nucleic Acid Extraction (UNEX) method.<sup><xref rid="R55" ref-type="bibr">55</xref></sup> Each concentrate was analyzed for HF183 by qPCR as described previously.<sup><xref rid="R22" ref-type="bibr">22</xref>,<xref rid="R51" ref-type="bibr">51</xref></sup> HF183 was determined to have been detected in a sample if two qPCR replicates from either DEUF sample showed amplification before the <italic toggle="yes">C</italic><sub>q</sub> value of 40.</p><p id="P15">Though produced using somewhat different workflows, the training and test data sets both ultimately consisted of single observations of the generic <italic toggle="yes">E. coli</italic> concentration, the presence of human-associated bacteria (HF183), and the presence of a human-associated virus in each pond for each sampling event. For both data sets, generic <italic toggle="yes">E. coli</italic> was measured in grab samples by culture, and HF183 was detected by qPCR in large-volume water samples processed by DEUF with secondary concentration by centrifugation. The human-associated virus assessed for the training data set, crAssphage, was detected by qPCR in centrifuge-concentrated DEUF sample backflush. For the test data set, FRNA GII coliphage was assessed as the human-associated virus in DEUF sample backflush (prior to any secondary concentration) using culture methods coupled with qPCR-based genotyping.</p></sec><sec id="S6"><title>Environmental Explanatory Variables.</title><p id="P16">Water quality parameters, including dissolved oxygen (mg/L), turbidity (NTU), pH, conductivity (<italic toggle="yes">&#x003bc;</italic>S/cm), and temperature (&#x000b0;C), were measured using a ProDSS Multiparameter Digital Water Quality Meter (YSI, Yellow Springs, OH) at the time of sample collection for both the training and test data sets. Negative turbidity measurements were censored at 0 NTU for analysis. Each parameter was measured four times over approximately 30 min during the training set collection, and the measurements were averaged, as described previously.<sup><xref rid="R48" ref-type="bibr">48</xref></sup></p><p id="P17">For the training data set, daily rainfall accumulation (inches) was collected using Rain101A Rainfall Data Loggers (MadgeTech, Inc., Warner, NH) or WatchDog 1120 Data Logging Rain Gauges (Spectrum Technologies, Inc. Aurora, IL) placed at each pond. Rainfall data were intermittently unavailable at individual ponds due to equipment failures. However, rainfall data were successfully collected from at least one of the four Grower A pond gauges during the entire study period. Since all Grower A ponds were located within a three-mile radius, data from all working gauges were averaged each day to create a complete data set for the study period. Rain gauge malfunctions occurred at two of the four Grower B ponds, but as these ponds were located less than 1 mile apart, their rain data were merged or averaged if data from both were available. Test data set rainfall measurements were retrieved from a University of Georgia-managed publicly accessible weather logging system stationed within 10 miles (16 km) of all study ponds.<sup><xref rid="R56" ref-type="bibr">56</xref></sup></p><p id="P18">Daily average wind speed (miles per hour [mph]) and daily solar radiation (MJ/m<sup>2</sup>) were obtained for both data sets from the US Department of Agriculture (USDA) Soil Climate Analysis Network monitoring station located within 15 miles (24 km) of all of the ponds.<sup><xref rid="R57" ref-type="bibr">57</xref></sup> Rainfall, wind, and solar radiation data were each aggregated into two variables: accumulation within the previous 2 days (Rain 0&#x02013;2, Wind 0&#x02013;2, and Solar 0&#x02013;2) and in the previous 2-to-7 days (Rain 2&#x02013;7, Wind 2&#x02013;7, and Solar 2&#x02013;7). These categories were constructed to represent more recent events and events occurring further in the past, respectively, a distinction shown to be meaningful in previous predictive models of fecal contamination in surface water.<sup><xref rid="R58" ref-type="bibr">58</xref>,<xref rid="R59" ref-type="bibr">59</xref></sup> The International Organization for Standardization (ISO) week of sample collection was also included as a continuous explanatory variable to account for recurring temporal patterns.</p><p id="P19">Proximity of the ponds to septic systems was considered a potential source of human fecal contamination in this region. Because public records of septic installations were incomplete, we used the proximity to a building as a proxy for the proximity to potential septic pollution sources. Due to the rural setting, it was likely that any buildings were served by septic systems. The Georgia Department of Public Health requires an absorption field area of 500 feet<sup>2</sup> for a two-bedroom house with a residential trench septic system and a 2190 ft<sup>2</sup> absorption field area for commercial buildings.<sup><xref rid="R60" ref-type="bibr">60</xref></sup> Therefore, a pond was classified as &#x0201c;close&#x0201d; to a building if it was located within 2000 ft (610 m) of any commercial building or within 500 ft (152 m) of all other building types, regardless of septic record. We ascertained building proximity from satellite imagery in Google Maps (<ext-link xlink:href="https://maps.google.com" ext-link-type="uri">maps.google.com</ext-link>). A detailed examination of building proximity and septic records near the ponds is provided in the <xref rid="SD1" ref-type="supplementary-material">Supporting Information (SI)</xref>.</p></sec><sec id="S7"><title>Descriptive Statistical Analysis.</title><p id="P20">Conductivity and the two cumulative solar radiation variables (Solar 0&#x02013;2 and Solar 2&#x02013;7) were log<sub>10</sub>-transformed prior to statistical analyses. Turbidity was also log<sub>10</sub>-transformed after adding one to each measurement to address zero values. Samples with <italic toggle="yes">E. coli</italic> most probable number (MPN) concentrations &#x02265;126 MPN/100 mL were classified as having elevated generic <italic toggle="yes">E. coli</italic> levels based on the EPA recreational water guidance and previous irrigation water models that predicted pathogenic <italic toggle="yes">E. coli</italic> gene occurrence using a 126 MPN/100 mL generic <italic toggle="yes">E. coli</italic> threshold.<sup><xref rid="R43" ref-type="bibr">43</xref></sup> Associations between the frequency of elevated generic <italic toggle="yes">E. coli</italic> &#x02265;126 MPN/100 mL and frequency of HF183 detection and between HF183 and crAssphage detection frequencies were assessed using a Cochran&#x02013;Mantel&#x02013;Haenszel (CMH) test stratified by pond. Pearson correlation analysis was conducted to assess pairwise correlations between all environmental explanatory variables. All analyses were conducted using R version 4.4.0.<sup><xref rid="R61" ref-type="bibr">61</xref></sup> Analysis code and study data are available at <ext-link xlink:href="https://cdcgov.github.io/WDPB_EMEL/manuscripts/irrigation_models/" ext-link-type="uri">https://cdcgov.github.io/WDPB_EMEL/manuscripts/irrigation_models/</ext-link>.</p></sec><sec id="S8"><title>Model Development.</title><p id="P21">Logistic regression and conditional random forest (CRF) models were developed to predict the detection of HF183, detection of any human fecal indicator (HFI), and elevated generic <italic toggle="yes">E. coli</italic> &#x02265;126 MPN/100 mL. HFI detection was defined as detecting either or both HF183 and a human-associated virus. For the training data set, the HFI variable used crAssphage detection as the second indicator of human fecal contamination. Detection of FRNA GII coliphage was used as the human-associated viral indicator to define the HFI variable in the test data set.<sup><xref rid="R34" ref-type="bibr">34</xref>&#x02013;<xref rid="R36" ref-type="bibr">36</xref></sup></p><p id="P22">The same set of explanatory variables was considered for both the logistic regression and CRF models; the final variable sets were selected separately for each modeling approach and outcome (HF183, HFI, and generic <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL). The full explanatory variable set evaluated included: ISO week of sample collection; water sample temperature, dissolved oxygen, pH, log<sub>10</sub> conductivity, and log<sub>10</sub> turbidity; cumulative rain, wind, and log<sub>10</sub> solar radiation in the previous 0&#x02013;2 days (Rain 0&#x02013;2, Wind 0&#x02013;2, and Solar 0&#x02013;2) and previous 2&#x02013;7 days (Rain 2&#x02013;7, Wind 2&#x02013;7, and Solar 2&#x02013;7); and a binary variable indicating building proximity. To limit colinearity, explanatory variables with pairwise correlation absolute value &#x02265;0.5 were not included in the same model.</p><p id="P23">Model training and tuning, including variable selection, were conducted using the training data set. The final trained models were then applied to the test data set to evaluate out-of-sample predictive performance. Models for the detection of HF183 and HFI were also retrained including a binary variable indicating <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL as an additional explanatory variable to evaluate whether elevated generic <italic toggle="yes">E. coli</italic> levels were predictive of human fecal contamination.</p><sec id="S9"><title>Training Regression Models.</title><p id="P24">Mixed-effects logistic regression models were implemented with the <italic toggle="yes">lme4</italic> package in R and included the pond of sample collection as a random effect to account for repeated measures.<sup><xref rid="R62" ref-type="bibr">62</xref></sup> Variable selection for the logistic regression models proceeded in two stages. First, univariable associations were evaluated in separate models for each binary outcome variable (detection of HF183, HFI detection, and generic <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL) and each explanatory variable. Explanatory variables with <italic toggle="yes">p</italic>-value &#x0003c;0.1 in the univariable models were considered for inclusion in multivariable models. Second, backward stepwise selection was performed to select the explanatory variables to retain in the multivariable mixed-effects logistic regression models. After specifying the full model with all variables retained from the univariable models, the explanatory variable with the highest <italic toggle="yes">p</italic>-value was removed and the full and reduced models were compared using a chi-squared test with one degree of freedom.<sup><xref rid="R62" ref-type="bibr">62</xref></sup> A nonsignificant chi-squared test at the 10% significance level indicated that the full model did not meaningfully reduce the deviance and was used as the decision criterion in favor of the simpler model. The procedure was repeated until a significant chi-square test was obtained. Due to convergence issues for models predicting generic <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL, a forward stepwise selection procedure was used instead, beginning with an intercept-only model (including the pond random effect) and adding variables until a nonsignificant chi-squared test was obtained.</p></sec><sec id="S10"><title>Training Conditional Random Forest Models.</title><p id="P25">CRF models build on the advantages of random forest analysis, including the ability to explore complex and nonlinear interactions between numerous explanatory variables without needing to prespecify the model structure, by incorporating conditional inference approaches to mitigate the overfitting and bias toward correlated variables exhibited by conventional random forest.<sup><xref rid="R63" ref-type="bibr">63</xref>&#x02013;<xref rid="R65" ref-type="bibr">65</xref></sup> CRF models from the <italic toggle="yes">party</italic> package were developed using the <italic toggle="yes">mlr</italic> package framework in R.<sup><xref rid="R63" ref-type="bibr">63</xref>,<xref rid="R66" ref-type="bibr">66</xref></sup> Models were trained using 10,001 conditional inference trees and the default hyperparameter values suggested for unbiased variable selection.<sup><xref rid="R64" ref-type="bibr">64</xref></sup> As a sensitivity analysis, we also constructed CRF models with hyperparameter values for the number of explanatory variables randomly considered for splitting each node (&#x0201c;mtry&#x0201d;) and the minimum number of observations to construct a terminal node (&#x0201c;minbucket&#x0201d;) tuned by maximizing the mean area under the receiver operating characteristic curve (AUC) using repeated 3-fold cross-validation (five iterations).<sup><xref rid="R44" ref-type="bibr">44</xref>,<xref rid="R65" ref-type="bibr">65</xref></sup> Synthetic minority oversampling technique (SMOTE) was implemented during hyperparameter tuning for an additional set of models to address class imbalance of the three binary outcome variables as an additional sensitivity analysis.<sup><xref rid="R67" ref-type="bibr">67</xref>,<xref rid="R68" ref-type="bibr">68</xref></sup> While resampling-based imbalance corrections have been reported to improve the predictive accuracy of previous CRF models of foodborne pathogen presence in water,<sup><xref rid="R69" ref-type="bibr">69</xref></sup> the practice has been criticized for producing poorly calibrated probabilistic predictions with inconsistent impacts on classification performance.<sup><xref rid="R70" ref-type="bibr">70</xref>&#x02013;<xref rid="R72" ref-type="bibr">72</xref></sup> We assessed variable importance as the independent impact of each variable on the AUC using a conditional permutation approach to address potential bias from correlated explanatory variables and outcome variable class imbalance.<sup><xref rid="R73" ref-type="bibr">73</xref>,<xref rid="R74" ref-type="bibr">74</xref></sup></p></sec></sec><sec id="S11"><title>Predictive Performance.</title><p id="P26">The trained logistic regression and CRF models were applied to the test data set to generate predicted probabilities for the detection of HF183, detection of HFI, and generic <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL in different ponds from the same growing region. Predictive performance was assessed by receiver operating characteristic (ROC) curve analysis using the <italic toggle="yes">pROC</italic> package in R.<sup><xref rid="R75" ref-type="bibr">75</xref></sup> For consistency with the CRF variable importance procedure, which utilized AUC as a less-biased alternative to the traditional accuracy metric for determining variable importance,<sup><xref rid="R74" ref-type="bibr">74</xref></sup> we estimated the area under the ROC curve as a dimensionless metric of the overall ability of each model to discriminate between the presence and absence of the outcome.<sup><xref rid="R76" ref-type="bibr">76</xref></sup> An AUC of 1 denotes perfect concordance between predicted and observed outcome values, indicating ideal model performance, and an AUC of 0.5 corresponds to model classification performance equivalent to random chance.<sup><xref rid="R77" ref-type="bibr">77</xref>,<xref rid="R78" ref-type="bibr">78</xref></sup> We also calculated predictive sensitivity (the proportion of test samples positive for the outcome correctly predicted to be positive by the model) and specificity (the proportion of test samples negative for the outcome correctly predicted to be negative) at model-specific classification thresholds identified by maximizing Youden&#x02019;s <italic toggle="yes">J</italic> statistic.<sup><xref rid="R79" ref-type="bibr">79</xref></sup> The classification threshold is the minimum predicted probability of the outcome required to classify a sample as positive; increasing the threshold generally increases specificity (i.e., reduces the false positive rate) at the expense of decreasing the sensitivity (the true positive rate). The threshold that maximizes <italic toggle="yes">J</italic> balances sensitivity and specificity by minimizing the overall proportion of misclassified samples, weighting false positives and false negatives equally.</p></sec></sec><sec id="S12"><title>RESULTS</title><sec id="S13"><title>Training Data Set.</title><p id="P27">Of the 217 training data set samples, HF183 was detected in 71 (33%) water samples, crAssphage was detected in 14 (7%) samples, and these two human-associated markers were codetected in 10 (5%) samples (<xref rid="T1" ref-type="table">Table 1</xref>). HF183 was detected in &#x02265;25% of the samples from ponds A1, A2, A3, A4, and B4, all of which were considered near buildings (&#x0003c;610 m from a commercial or &#x0003c;152 m from any other building). Likewise, all ponds in which crAssphage was detected were near buildings. CrAssphage detections were significantly associated with HF183 detections (CMH <inline-formula><mml:math id="M2" display="inline"><mml:msubsup><mml:mi>&#x003c7;</mml:mi><mml:mrow><mml:mtext>df</mml:mtext><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>5.11</mml:mn></mml:math></inline-formula>, <italic toggle="yes">p</italic> = 0.02). Generic <italic toggle="yes">E. coli</italic> exceeded 126 MPN/100 mL at least once during the sampling period in every pond (4&#x02013;22% of samples per pond, <xref rid="T1" ref-type="table">Table 1</xref>). Elevated <italic toggle="yes">E. coli</italic> &#x02265;126 MPN/100 mL were not associated with HF183 detection (CMH <inline-formula><mml:math id="M1" display="inline"><mml:msubsup><mml:mi>&#x003c7;</mml:mi><mml:mrow><mml:mtext>df</mml:mtext><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>1.82</mml:mn></mml:math></inline-formula>, <italic toggle="yes">p</italic> = 0.18). Descriptive statistics of explanatory variables are summarized for each pond in <xref rid="SD1" ref-type="supplementary-material">Table S1</xref>, and pairwise Pearson correlation coefficients are presented in <xref rid="SD1" ref-type="supplementary-material">Figure S1</xref>.</p></sec><sec id="S14"><title>Test Data Set.</title><p id="P28">All ponds in the test data set were located within 500 ft (152 m) of a building or 2000 ft (610 m) of a commercial building. HF183 was detected in about a third of the samples from each pond (<xref rid="T1" ref-type="table">Table 1</xref>). Human-associated FRNA GII coliphage was detected less frequently than HF183 but at a similar frequency to crAssphage in the training data set. HF183 and FRNA GII coliphage were codetected in 5 (8%) samples. Similarly, generic <italic toggle="yes">E. coli</italic> levels were &#x02265;126 MPN/100 mL at comparable frequencies in both the training (10%) and test (9%) data sets.</p></sec><sec id="S15"><title>Models.</title><sec id="S16"><title>Human Fecal Indicators.</title><p id="P29">Results of univariable logistic regression models used to inform explanatory variable selection are presented in <xref rid="SD1" ref-type="supplementary-material">Figure S2</xref>. Following backward stepwise variable selection, building presence, cumulative rainfall in the previous 0&#x02013;2 days, and cumulative rainfall in the previous 2&#x02013;7 days were retained in the final multivariable logistic regression models for both HF183 detection and HFI detection. The presence of a building was associated with elevated odds of HF183 and HFI (HF183 odds ratio [OR]: 24.8, 95% confidence interval [CI]: 3.6&#x02013;172.5; HFI OR: 28.6, 95%CI: 4.4&#x02013;187.1; <xref rid="F1" ref-type="fig">Figure 1</xref>). Rainfall was also positively associated with HFI presence. The odds of detection approximately doubled for each additional inch (2.5 cm) of rain 0&#x02013;2 days before sample collection for both HF183 (OR: 2.0, 95%CI: 0.99&#x02013;4.2) and HFI (OR: 2.1, 95%CI: 1.0&#x02013;4.5). An additional inch of rain 2&#x02013;7 days before sample collection was associated with a 70% increase in the odds of detecting both HF183 (OR: 1.7, 95%CI: 1.2&#x02013;2.5) and HFI (OR: 1.7, 95%CI: 1.2&#x02013;2.5). Although the magnitude of the estimated associations was lower for rain 2&#x02013;7 days prior, the relationships were more precise than the larger associations estimated for rain in the previous 0&#x02013;2 days, which also included the null. Similarly, the top two ranked explanatory variables by variable importance in the CRF models for the HF183 and HFI were the presence of a building and rainfall in the previous 2&#x02013;7 days (<xref rid="F2" ref-type="fig">Figure 2</xref>). All other variables had negligible importance values. Generic <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL was not significant when included as an additional explanatory variable in logistic regression models (HF183 OR: 1.1, 95%CI: 0.26&#x02013;5.0; HFI OR: 0.77, 95%CI: 0.18&#x02013;3.4) and was of negligible variable importance in CRF models.</p></sec><sec id="S17"><title>Generic E. coli.</title><p id="P30">Rainfall and solar radiation in the previous 0&#x02013;2 days were the only variables retained in the multivariable logistic regression model for elevated generic <italic toggle="yes">E. coli</italic>. Rainfall in the previous 0&#x02013;2 days was associated with increased odds of generic <italic toggle="yes">E. coli</italic> &#x02265;126 MPN/100 mL (OR: 6.7, 95%CI: 2.6&#x02013;17.6; <xref rid="F1" ref-type="fig">Figure 1</xref>). Conversely, a log<sub>10</sub>-increase in solar radiation 0&#x02013;2 days prior was associated with lower odds of generic <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL (OR: 0.12, 95%CI: 0.01&#x02013;1.1), although the association was not significant. CRF analysis also ranked rainfall and solar radiation in the previous 0&#x02013;2 days as the most important variables for predicting generic <italic toggle="yes">E. coli</italic> &#x02265;126 MPN/100 mL (<xref rid="F2" ref-type="fig">Figure 2</xref>).</p></sec><sec id="S18"><title>Model Prediction Performance.</title><p id="P31">Model predictions for the test data set outcomes (2015&#x02013;2016) were analyzed with ROC curves (<xref rid="F3" ref-type="fig">Figure 3</xref>), using the AUC to evaluate overall predictive performance. Logistic regression models and CRF models demonstrated comparable discriminatory ability. Logistic regression model AUCs were slightly higher than the CRF AUC for the human-associated outcomes but lower for elevated levels of <italic toggle="yes">E. coli</italic>. Performance was lowest for predicting HF183 detection (AUC: 0.56&#x02013;0.60). Despite the substitution of human-associated FRNA GII coliphage for crAssphage in the HFI variable definition, logistic regression predictions of HFI detection were more accurate (AUC: 0.64) but did not achieve the AUC &#x0003e; 0.7 target conventionally viewed as acceptable predictive performance.<sup><xref rid="R78" ref-type="bibr">78</xref></sup> Models built for predicting generic <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL had higher predictive performance (AUC: 0.77&#x02013;0.79) than models for either human-associated outcome, attributable to the high sensitivity (100%) attained at moderate specificities (54&#x02013;64%). However, sensitivity declined rapidly with any further increase in specificity, reflected in the low classification thresholds identified by Youden&#x02019;s <italic toggle="yes">J</italic> statistic at probabilities of 0.08&#x02013;0.11. Such low thresholds indicate that the application of more stringent criteria to discriminate between detects and non-detects sharply reduced identification of true positives (of which there were only 6 in the test data set) without providing a corresponding reduction in the false positive rate. Including <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL as an additional explanatory variable in the logistic regression and CRF models did not improve predictions of either human-associated outcome (<xref rid="SD1" ref-type="supplementary-material">Figure S3</xref>). Similarly, CRF hyperparameter tuning and imbalance correction did not improve AUC for test data set predictions (<xref rid="SD1" ref-type="supplementary-material">Figure S4</xref>).</p></sec></sec></sec><sec id="S19"><title>DISCUSSION</title><p id="P32">Our results can be used to identify factors associated with human fecal contamination in southeastern US produce irrigation water. However, predictive statistical models should be used with caution in irrigation water quality assessments, as predictions for locations and times beyond those on which the models were trained may be unreliable. In the current study, no model produced accurate out-of-sample predictions of the presence of human fecal indicators in additional ponds from the same growing area sampled in different years. Although the negligible influence of nearly all explanatory variables and the dominance of a single, static site characteristic (building proximity) suggest limited opportunity to improve predictions through increased data collection, expanding the training data set with observations at additional locations and times could potentially provide greater generalizability to inform out-of-sample predictions.</p><p id="P33">The most influential factor in detecting molecular human fecal indicators in irrigation water was being located near a building, which, in this rural area, indicates a high likelihood of proximity to a septic system. The soil in this area is rated &#x0201c;very limited&#x0201d; for septic tank absorption fields, meaning septic systems are expected to perform poorly and may introduce human fecal contamination to adjacent environments.<sup><xref rid="R80" ref-type="bibr">80</xref></sup> Recent rainfall was also associated with an increased risk of detecting human fecal indicators and generic <italic toggle="yes">E. coli</italic> &#x02265;126 MPN/100 mL. Increased rainfall in the previous 48 h was the strongest predictor of elevated <italic toggle="yes">E. coli</italic> and was associated with larger, but more variable, increases in odds of HF183 and HFI detection than less recent rainfall. The impact of increased rainfall in the previous 2&#x02013;7 days was smaller in magnitude but more consistently associated with increased odds of HF183 and HFI detection. Although the ponds sampled for the training data set were all reported to be fed by surface water, this suggests that contamination of the subsurface water through septic pollution could be a contributor to human fecal contamination in this growing region.</p><p id="P34">While human MST markers have been reported in produce irrigation water,<sup><xref rid="R45" ref-type="bibr">45</xref>,<xref rid="R81" ref-type="bibr">81</xref>,<xref rid="R82" ref-type="bibr">82</xref></sup> predictors of these markers in irrigation ponds have not previously been characterized. Studies in beach waters have consistently found that precipitation is an important predictor of HF183.<sup><xref rid="R40" ref-type="bibr">40</xref>,<xref rid="R83" ref-type="bibr">83</xref>,<xref rid="R84" ref-type="bibr">84</xref></sup> Rainfall was also significantly associated with HF183 in private well water in Pennsylvania<sup><xref rid="R85" ref-type="bibr">85</xref></sup> and in rural waterways where onsite wastewater treatment was suspected as the source of contamination.<sup><xref rid="R86" ref-type="bibr">86</xref></sup> While other studies have found significant associations between solar radiation and HF183,<sup><xref rid="R40" ref-type="bibr">40</xref></sup> we did not observe human marker associations with solar radiation in this study. Rainfall variables and solar radiation have also been determined as critical factors in modeling unsafe ambient recreational water conditions due to elevated <italic toggle="yes">E. coli</italic>.<sup><xref rid="R87" ref-type="bibr">87</xref>,<xref rid="R88" ref-type="bibr">88</xref></sup> A systematic review of predictive models of <italic toggle="yes">E. coli</italic> in beach water found that rainfall was the most frequently included variable in final models.<sup><xref rid="R89" ref-type="bibr">89</xref></sup> The second most commonly included variable was turbidity, which corresponds to more suspended particles in the water column that can provide protection against solar inactivation for particle-associated microorganisms.<sup><xref rid="R90" ref-type="bibr">90</xref></sup> However, turbidity was not associated with any of the fecal indicator outcomes in this study. The frequent presence of algae during sample collection may partially account for the lack of association with turbidity. Algae can interfere with probe-based turbidity measurements and have the potential to both inhibit and stimulate bacterial growth.<sup><xref rid="R91" ref-type="bibr">91</xref></sup> Future studies may consider using more robust laboratory-based turbidity measurements and quantifying algae in surface water samples to address inconsistencies potentially introduced by heavy algal loads.</p><p id="P35">Elevated generic <italic toggle="yes">E. coli</italic> and the presence of HF183 were not correlated in this study. This finding is consistent with previous research, showing that the drivers of human fecal contamination vary from those for generic fecal indicator bacteria.<sup><xref rid="R28" ref-type="bibr">28</xref>,<xref rid="R32" ref-type="bibr">32</xref>,<xref rid="R40" ref-type="bibr">40</xref></sup> Multiple lines of evidence suggest the likely presence of nonhuman fecal contamination, including previous research that identified wildlife- and livestock-shed foodborne pathogens <italic toggle="yes">Campylobacter jejuni</italic>, <italic toggle="yes">Salmonella enterica</italic>, and pathogenic <italic toggle="yes">E. coli</italic> in surface waters used for irrigation in this growing region.<sup><xref rid="R8" ref-type="bibr">8</xref>,<xref rid="R92" ref-type="bibr">92</xref>&#x02013;<xref rid="R95" ref-type="bibr">95</xref></sup></p><p id="P36">This study allowed us not only to assess predictors of general and human-specific fecal contamination but also to compare different predictive modeling approaches. Previous studies have suggested that machine learning-based predictive models could be used to determine when pathogens are most likely to be present in irrigation water.<sup><xref rid="R43" ref-type="bibr">43</xref>,<xref rid="R96" ref-type="bibr">96</xref></sup> In particular, conditional random forest models were previously found to more accurately capture relationships with environmental factors to predict <italic toggle="yes">Salmonella</italic> and pathogenic <italic toggle="yes">E. coli</italic> presence in produce irrigation water in northeastern and southwestern US growing regions.<sup><xref rid="R43" ref-type="bibr">43</xref>,<xref rid="R44" ref-type="bibr">44</xref></sup> A comparison of predictive modeling approaches also identified random forest models as the most accurate approach for predicting fecal indicator bacteria in ambient recreational water.<sup><xref rid="R97" ref-type="bibr">97</xref></sup> However, our study observed out-of-sample predictive performance by CRF models for human fecal indicator presence that was only marginally better than chance and slightly inferior to the predictive performance of logistic regression models. A recent systematic review of clinical prediction models for a range of binary outcomes likewise found no consistent advantage of random forest and other machine learning approaches over logistic regression.<sup><xref rid="R98" ref-type="bibr">98</xref></sup> Alternative performance metrics to AUC could have yielded different relative performance rankings of the two approaches, but the absolute performance was sufficiently poor that any reasonable metric should have captured the predictive inadequacy of both approaches. Previous comparisons of random forest- and regression-based approaches found that the different methods identified different explanatory variables as important for predicting pathogen presence in irrigation water.<sup><xref rid="R44" ref-type="bibr">44</xref></sup> By contrast, in this study, both regression and CRF approaches identified the same influential explanatory variables for each fecal indicator outcome.</p><p id="P37">A strength of our study was the inclusion of multiple markers of human fecal contamination to address the limitations of the individual markers. HF183 has been shown to cross-react with poultry and dog feces in many settings,<sup><xref rid="R24" ref-type="bibr">24</xref>,<xref rid="R99" ref-type="bibr">99</xref>&#x02013;<xref rid="R101" ref-type="bibr">101</xref></sup> while crAssphage, though less extensively validated, has previously demonstrated superior host specificity.<sup><xref rid="R99" ref-type="bibr">99</xref></sup> Domestic dogs were observed during sample collection at residences near the irrigation ponds, which could have served as a potential source of the HF183 assay cross-reaction. Therefore, we used a conservative detection criterion of two or more positive qPCR replicates. CrAssphage may be a less-sensitive human fecal indicator than HF183, though it is often correlated with HF183, as was observed in the present study.<sup><xref rid="R18" ref-type="bibr">18</xref>,<xref rid="R28" ref-type="bibr">28</xref></sup> The human MST markers were codetected too infrequently to develop predictive models of HF183 and crAssphage codetection, but all samples in which human markers were codetected occurred in irrigation ponds near buildings, further suggesting the influence of buildings (with presumed septic systems) on human fecal contamination of irrigation waters.</p><p id="P38">Because crAssphage was not measured in the test data set, we substituted FRNA GII coliphage as the human-associated fecal indicator virus. Though less human-specific than crAssphage,<sup><xref rid="R35" ref-type="bibr">35</xref>,<xref rid="R36" ref-type="bibr">36</xref></sup> coliphage was detected with similar frequency in the test ponds (all close to buildings) as the frequency of crAssphage detection in the training ponds with nearby buildings. Furthermore, the models developed to predict HF183 and/or crAssphage produced more accurate predictions for HF183 and/or FRNA GII coliphage than the HF183-trained model predictions of HF183 alone, supporting FRNA GII coliphage as a reasonable substitute for crAssphage as a human fecal indicator virus in this setting. Future studies should consider the addition of a viral concentration step, such as PEG precipitation or cellulose ester membrane filtration, to increase the recovery of human-associated viral markers and improve the sensitivity of human fecal contamination detection.<sup><xref rid="R18" ref-type="bibr">18</xref>,<xref rid="R27" ref-type="bibr">27</xref></sup></p></sec><sec id="S20"><title>CONCLUSIONS</title><p id="P39">This research demonstrated significantly more human fecal marker intrusion into irrigation ponds in an agricultural region of southwest Georgia when a building was present and with greater rainfall in the previous week. This should be considered when a preharvest water assessment is completed for the introduction of hazards onto produce. Human fecal contamination from nearby buildings should be assessed prior to using an irrigation pond for produce production. Predictive models have previously been suggested for preharvest assessment; however, this study demonstrated that while our modeling approaches were able to determine risk factors, they could not reliably predict water contamination over multiple years. Our findings highlight the continued role for water quality testing, including MST approaches, in protecting the safety of fresh produce.</p></sec><sec sec-type="supplementary-material" id="SM1"><title>Supplementary Material</title><supplementary-material id="SD1" position="float" content-type="local-data"><label>Supplemental</label><media xlink:href="NIHMS2041601-supplement-Supplemental.pdf" id="d67e1072" position="anchor"/></supplementary-material></sec></body><back><ack id="S21"><title>ACKNOWLEDGMENTS</title><p id="P40">We thank Daniel Weller for guidance on analytical methods. Funding for this project was provided by the Center for Produce Safety through a CDFA 2019 Specialty Crop Block Grant Program &#x00026; CPS Campaign for Research and through CDFA SCBGP grant #SBC14060. The table of contents graphic was created with BioRender. The use of trade names and names of commercial sources is for identification only and does not imply endorsement by the Centers for Disease Control and Prevention or the U.S. Department of Health and Human Services. The findings and conclusions are those of the authors and do not necessarily represent those of the Centers for Disease Control and Prevention.</p></ack><fn-group><fn id="FN3"><p id="P41">Supporting Information</p><p id="P42">The Supporting Information is available free of charge at <ext-link xlink:href="https://pubs.acs.org/doi/10.1021/acsestwater.4c00839?goto=supporting-info" ext-link-type="uri">https://pubs.acs.org/doi/10.1021/acsestwater.4c00839</ext-link>.</p><p id="P43">Additional building proximity and septic system details; descriptive statistics; univariable odds ratios; predictions of human fecal contamination using <italic toggle="yes">E. coli</italic> as explanatory variable; and hyperparameter tuning and imbalance correction sensitivity analyses (<ext-link xlink:href="https://pubs.acs.org/doi/suppl/10.1021/acsestwater.4c00839/suppl_file/ew4c00839_si_001.pdf" ext-link-type="uri">PDF</ext-link>)</p></fn><fn id="FN4"><p id="P44">Complete contact information is available at: <ext-link xlink:href="https://pubs.acs.org/doi/10.1021/acsestwater.4c00839?ref=pdf" ext-link-type="uri">https://pubs.acs.org/10.1021/acsestwater.4c00839</ext-link></p></fn><fn fn-type="COI-statement" id="FN5"><p id="P45">The authors declare no competing financial interest.</p></fn></fn-group><ref-list><title>REFERENCES</title><ref id="R1"><label>(1)</label><mixed-citation publication-type="journal"><name><surname>Batz</surname><given-names>MB</given-names></name>; <name><surname>Richardson</surname><given-names>LC</given-names></name>; <name><surname>Bazaco</surname><given-names>MC</given-names></name>; <name><surname>Parker</surname><given-names>CC</given-names></name>; <name><surname>Chirtel</surname><given-names>SJ</given-names></name>; <name><surname>Cole</surname><given-names>D</given-names></name>; <name><surname>Golden</surname><given-names>NJ</given-names></name>; <name><surname>Griffin</surname><given-names>PM</given-names></name>; <name><surname>Gu</surname><given-names>W</given-names></name>; <name><surname>Schmitt</surname><given-names>SK</given-names></name>; <name><surname>Wolpert</surname><given-names>BJ</given-names></name>; <name><surname>Kufel</surname><given-names>JSZ</given-names></name>; <name><surname>Hoekstra</surname><given-names>RM</given-names></name>
<article-title>Recency-Weighted Statistical Modeling Approach to Attribute Illnesses Caused by 4 Pathogens to Food Sources Using Outbreak Data, United States</article-title>. <source>Emerg. Infect. Dis</source>
<year>2021</year>, <volume>27</volume> (<issue>1</issue>), <fpage>214</fpage>&#x02013;<lpage>222</lpage>.<pub-id pub-id-type="pmid">33350919</pub-id>
</mixed-citation></ref><ref id="R2"><label>(2)</label><mixed-citation publication-type="book"><collab>Interagency Food Safety Analytics Collaboration</collab>. <source>Foodborne Illness Source Attribution Estimates for 2021 for Salmonella, Escherichia coli O157, and Listeria monocytogenes Using Multi-Year Outbreak Surveillance Data, United States</source>; <publisher-name>U.S. Department of Health and Human Services, Centers for Disease Control and Prevention, Food and Drug Administration, U.S. Department of Agriculture&#x02019;s Food Safety and Inspection Service</publisher-name>: <publisher-loc>Atlanta, GA and Washington, D.C.</publisher-loc>, <year>2023</year>. <comment><ext-link xlink:href="https://www.cdc.gov/ifsac/media/pdfs/P19-2021-report-TriAgency-508.pdf" ext-link-type="uri">https://www.cdc.gov/ifsac/media/pdfs/P19-2021-report-TriAgency-508.pdf</ext-link>.</comment></mixed-citation></ref><ref id="R3"><label>(3)</label><mixed-citation publication-type="journal"><name><surname>Steele</surname><given-names>M</given-names></name>; <name><surname>Odumeru</surname><given-names>J</given-names></name>
<article-title>Irrigation Water as Source of Foodborne Pathogens on Fruit and Vegetables</article-title>. <source>J. Food Prot</source>
<year>2004</year>, <volume>67</volume> (<issue>12</issue>), <fpage>2839</fpage>&#x02013;<lpage>2849</lpage>.<pub-id pub-id-type="pmid">15633699</pub-id>
</mixed-citation></ref><ref id="R4"><label>(4)</label><mixed-citation publication-type="journal"><name><surname>Holcomb</surname><given-names>DA</given-names></name>; <name><surname>Stewart</surname><given-names>JR</given-names></name>
<article-title>Microbial Indicators of Fecal Pollution: Recent Progress and Challenges in Assessing Water Quality</article-title>. <source>Curr. Environ. Health Rep</source>
<year>2020</year>, <volume>7</volume> (<issue>3</issue>), <fpage>311</fpage>&#x02013;<lpage>324</lpage>.<pub-id pub-id-type="pmid">32542574</pub-id>
</mixed-citation></ref><ref id="R5"><label>(5)</label><mixed-citation publication-type="webpage"><collab>U.S. Environmental Protection Agency</collab>. <source>Factsheet on Water Quality Parameters: E. coli (Escherichia coli)</source>; <comment>EPA 841F21007F</comment>, <year>2021</year>. <comment><ext-link xlink:href="https://www.epa.gov/system/files/documents/2021-07/parameter-factsheet_e.-coli.pdf" ext-link-type="uri">https://www.epa.gov/system/files/documents/2021-07/parameter-factsheet_e.-coli.pdf</ext-link>.</comment></mixed-citation></ref><ref id="R6"><label>(6)</label><mixed-citation publication-type="book"><collab>California Leafy Greens Marketing Agreement</collab>. <source>Commodity Specific Food Safety Guidelines For the Production and Harvest of Lettuce and Leafy Greens</source>; <publisher-name>Western Growers Association</publisher-name>: <publisher-loc>Irvine, CA</publisher-loc>, <year>2021</year>. <comment><ext-link xlink:href="https://lgma-assets.sfo2.digitaloceanspaces.com/downloads/August-2021-CA-LGMA-Metrics_FINAL-v20211208_A11Y.pdf" ext-link-type="uri">https://lgma-assets.sfo2.digitaloceanspaces.com/downloads/August-2021-CA-LGMA-Metrics_FINAL-v20211208_A11Y.pdf</ext-link>.</comment></mixed-citation></ref><ref id="R7"><label>(7)</label><mixed-citation publication-type="journal"><name><surname>Shelton</surname><given-names>DR</given-names></name>; <name><surname>Karns</surname><given-names>JS</given-names></name>; <name><surname>Coppock</surname><given-names>C</given-names></name>; <name><surname>Patel</surname><given-names>J</given-names></name>; <name><surname>Sharma</surname><given-names>M</given-names></name>; <name><surname>Pachepsky</surname><given-names>YA</given-names></name>
<article-title>Relationship between <italic toggle="yes">eae</italic> and <italic toggle="yes">stx</italic> Virulence Genes and <italic toggle="yes">Escherichia coli</italic> in an Agricultural Watershed: Implications for Irrigation Water Standards and Leafy Green Commodities</article-title>. <source>J. Food Prot</source>
<year>2011</year>, <volume>74</volume> (<issue>1</issue>), <fpage>18</fpage>&#x02013;<lpage>23</lpage>.<pub-id pub-id-type="pmid">21219758</pub-id>
</mixed-citation></ref><ref id="R8"><label>(8)</label><mixed-citation publication-type="journal"><name><surname>Harris</surname><given-names>CS</given-names></name>; <name><surname>Tertuliano</surname><given-names>M</given-names></name>; <name><surname>Rajeev</surname><given-names>S</given-names></name>; <name><surname>Vellidis</surname><given-names>G</given-names></name>; <name><surname>Levy</surname><given-names>K</given-names></name>
<article-title>Impact of Storm Runoff on <italic toggle="yes">Salmonella</italic> and <italic toggle="yes">Escherichia coli</italic> Prevalence in Irrigation Ponds of Fresh Produce Farms in Southern Georgia</article-title>. <source>J. Appl. Microbiol</source>
<year>2018</year>, <volume>124</volume> (<issue>3</issue>), <fpage>910</fpage>&#x02013;<lpage>921</lpage>.<pub-id pub-id-type="pmid">29316043</pub-id>
</mixed-citation></ref><ref id="R9"><label>(9)</label><mixed-citation publication-type="journal"><name><surname>Antaki</surname><given-names>EM</given-names></name>; <name><surname>Vellidis</surname><given-names>G</given-names></name>; <name><surname>Harris</surname><given-names>C</given-names></name>; <name><surname>Aminabadi</surname><given-names>P</given-names></name>; <name><surname>Levy</surname><given-names>K</given-names></name>; <name><surname>Jay-Russell</surname><given-names>MT</given-names></name>
<article-title>Low Concentration of <italic toggle="yes">Salmonella enterica</italic> and Generic <italic toggle="yes">Escherichia coli</italic> in Farm Ponds and Irrigation Distribution Systems Used for Mixed Produce Production in Southern Georgia</article-title>. <source>Foodborne Pathog. Dis</source>
<year>2016</year>, <volume>13</volume> (<issue>10</issue>), <fpage>551</fpage>&#x02013;<lpage>558</lpage>.<pub-id pub-id-type="pmid">27400147</pub-id>
</mixed-citation></ref><ref id="R10"><label>(10)</label><mixed-citation publication-type="journal"><name><surname>Jokinen</surname><given-names>CC</given-names></name>; <name><surname>Hillman</surname><given-names>E</given-names></name>; <name><surname>Tymensen</surname><given-names>L</given-names></name>
<article-title>Sources of Generic <italic toggle="yes">Escherichia coli</italic> and Factors Impacting Guideline Exceedances for Food Safety in an Irrigation Reservoir Outlet and Two Canals</article-title>. <source>Water Res</source>
<year>2019</year>, <volume>156</volume>, <fpage>148</fpage>&#x02013;<lpage>158</lpage>.<pub-id pub-id-type="pmid">30913418</pub-id>
</mixed-citation></ref><ref id="R11"><label>(11)</label><mixed-citation publication-type="journal"><name><surname>Villabruna</surname><given-names>N</given-names></name>; <name><surname>Koopmans</surname><given-names>MPG</given-names></name>; <name><surname>De Graaf</surname><given-names>M</given-names></name>
<article-title>Animals as Reservoir for Human Norovirus</article-title>. <source>Viruses</source>
<year>2019</year>, <volume>11</volume> (<issue>5</issue>), <fpage>478</fpage>.<pub-id pub-id-type="pmid">31130647</pub-id>
</mixed-citation></ref><ref id="R12"><label>(12)</label><mixed-citation publication-type="journal"><name><surname>Aggarwal</surname><given-names>R</given-names></name>; <name><surname>Jameel</surname><given-names>S</given-names></name>
<article-title>Hepatitis E</article-title>. <source>Hepatology</source>
<year>2010</year>, <volume>54</volume> (<issue>6</issue>), <fpage>2218</fpage>&#x02013;<lpage>2226</lpage>.</mixed-citation></ref><ref id="R13"><label>(13)</label><mixed-citation publication-type="journal"><name><surname>Di Cola</surname><given-names>G</given-names></name>; <name><surname>Fantilli</surname><given-names>AC</given-names></name>; <name><surname>Pisano</surname><given-names>MB</given-names></name>; <name><surname>R&#x000e9;</surname><given-names>VE</given-names></name>
<article-title>Foodborne Transmission of Hepatitis A and Hepatitis E Viruses: A Literature Review</article-title>. <source>Int. J. Food Microbiol</source>
<year>2021</year>, <volume>338</volume>, <comment>No. 108986.</comment></mixed-citation></ref><ref id="R14"><label>(14)</label><mixed-citation publication-type="journal"><name><surname>Eberhard</surname><given-names>ML</given-names></name>; <name><surname>Ortega</surname><given-names>YR</given-names></name>; <name><surname>Hanes</surname><given-names>DE</given-names></name>; <name><surname>Nace</surname><given-names>EK</given-names></name>; <name><surname>Quy Do</surname><given-names>R</given-names></name>; <name><surname>Robl</surname><given-names>MG</given-names></name>; <name><surname>Won</surname><given-names>KY</given-names></name>; <name><surname>Gavidia</surname><given-names>C</given-names></name>; <name><surname>Sass</surname><given-names>NL</given-names></name>; <name><surname>Mansfield</surname><given-names>K</given-names></name>; <name><surname>Gozalo</surname><given-names>A</given-names></name>; <name><surname>Griffiths</surname><given-names>J</given-names></name>; <name><surname>Gilman</surname><given-names>R</given-names></name>; <name><surname>Sterling</surname><given-names>CR</given-names></name>; <name><surname>Arrowood</surname><given-names>MJ</given-names></name>
<article-title>Attempts to Establish Experimental <italic toggle="yes">Cyclospora cayetanensis</italic> Infection in Laboratory Animals</article-title>. <source>J. Parasitol</source>
<year>2000</year>, <volume>86</volume> (<issue>3</issue>), <fpage>577</fpage>&#x02013;<lpage>582</lpage>.<pub-id pub-id-type="pmid">10864257</pub-id>
</mixed-citation></ref><ref id="R15"><label>(15)</label><mixed-citation publication-type="journal"><name><surname>Kokkinos</surname><given-names>P</given-names></name>; <name><surname>Kozyra</surname><given-names>I</given-names></name>; <name><surname>Lazic</surname><given-names>S</given-names></name>; <name><surname>S&#x000f6;derberg</surname><given-names>K</given-names></name>; <name><surname>Vasickova</surname><given-names>P</given-names></name>; <name><surname>Bouwknegt</surname><given-names>M</given-names></name>; <name><surname>Rutjes</surname><given-names>S</given-names></name>; <name><surname>Willems</surname><given-names>K</given-names></name>; <name><surname>Moloney</surname><given-names>R</given-names></name>; <name><surname>De Roda Husman</surname><given-names>AM</given-names></name>; <name><surname>Kaupke</surname><given-names>A</given-names></name>; <name><surname>Legaki</surname><given-names>E</given-names></name>; <name><surname>D&#x02019;Agostino</surname><given-names>M</given-names></name>; <name><surname>Cook</surname><given-names>N</given-names></name>; <name><surname>Von Bonsdorff</surname><given-names>C-H</given-names></name>; <name><surname>Rzezutka</surname><given-names>A</given-names></name>; <name><surname>Petrovic</surname><given-names>T</given-names></name>; <name><surname>Maunula</surname><given-names>L</given-names></name>; <name><surname>Pavlik</surname><given-names>I</given-names></name>; <name><surname>Vantarakis</surname><given-names>A</given-names></name>
<article-title>Virological Quality of Irrigation Water in Leafy Green Vegetables and Berry Fruits Production Chains</article-title>. <source>Food Environ. Virol</source>
<year>2017</year>, <volume>9</volume> (<issue>1</issue>), <fpage>72</fpage>&#x02013;<lpage>78</lpage>.<pub-id pub-id-type="pmid">27709435</pub-id>
</mixed-citation></ref><ref id="R16"><label>(16)</label><mixed-citation publication-type="journal"><name><surname>Giangaspero</surname><given-names>A</given-names></name>; <name><surname>Marangi</surname><given-names>M</given-names></name>; <name><surname>Koehler</surname><given-names>AV</given-names></name>; <name><surname>Papini</surname><given-names>R</given-names></name>; <name><surname>Normanno</surname><given-names>G</given-names></name>; <name><surname>Lacasella</surname><given-names>V</given-names></name>; <name><surname>Lonigro</surname><given-names>A</given-names></name>; <name><surname>Gasser</surname><given-names>RB</given-names></name>
<article-title>Molecular Detection of <italic toggle="yes">Cyclospora</italic> in Water, Soil, Vegetables and Humans in Southern Italy Signals a Need for Improved Monitoring by Health Authorities</article-title>. <source>Int. J. Food Microbiol</source>
<year>2015</year>, <volume>211</volume>, <fpage>95</fpage>&#x02013;<lpage>100</lpage>.<pub-id pub-id-type="pmid">26188495</pub-id>
</mixed-citation></ref><ref id="R17"><label>(17)</label><mixed-citation publication-type="journal"><name><surname>Harwood</surname><given-names>VJ</given-names></name>; <name><surname>Staley</surname><given-names>C</given-names></name>; <name><surname>Badgley</surname><given-names>BD</given-names></name>; <name><surname>Borges</surname><given-names>K</given-names></name>; <name><surname>Korajkic</surname><given-names>A</given-names></name>
<article-title>Microbial Source Tracking Markers for Detection of Fecal Contamination in Environmental Waters: Relationships between Pathogens and Human Health Outcomes</article-title>. <source>FEMS Microbiol. Rev</source>
<year>2014</year>, <volume>38</volume> (<issue>1</issue>), <fpage>1</fpage>&#x02013;<lpage>40</lpage>.<pub-id pub-id-type="pmid">23815638</pub-id>
</mixed-citation></ref><ref id="R18"><label>(18)</label><mixed-citation publication-type="journal"><name><surname>Sala-Comorera</surname><given-names>L</given-names></name>; <name><surname>Reynolds</surname><given-names>LJ</given-names></name>; <name><surname>Martin</surname><given-names>NA</given-names></name>; <name><surname>Pascual-Benito</surname><given-names>M</given-names></name>; <name><surname>Stephens</surname><given-names>JH</given-names></name>; <name><surname>Nolan</surname><given-names>TM</given-names></name>; <name><surname>Gitto</surname><given-names>A</given-names></name>; <name><surname>O&#x02019;Hare</surname><given-names>GMP</given-names></name>; <name><surname>O&#x02019;Sullivan</surname><given-names>JJ</given-names></name>; <name><surname>Garc&#x000ed;a-Aljaro</surname><given-names>C</given-names></name>; <name><surname>Meijer</surname><given-names>WG</given-names></name>
<article-title>crAssphage as a Human Molecular Marker to Evaluate Temporal and Spatial Variability in Faecal Contamination of Urban Marine Bathing Waters</article-title>. <source>Sci. Total Environ</source>
<year>2021</year>, <volume>789</volume>, <comment>No. 147828.</comment></mixed-citation></ref><ref id="R19"><label>(19)</label><mixed-citation publication-type="journal"><name><surname>Ahmed</surname><given-names>W</given-names></name>; <name><surname>Gyawali</surname><given-names>P</given-names></name>; <name><surname>Feng</surname><given-names>S</given-names></name>; <name><surname>McLellan</surname><given-names>SL</given-names></name>
<article-title>Host Specificity and Sensitivity of Established and Novel Sewage-Associated Marker Genes in Human and Nonhuman Fecal Samples</article-title>. <source>Appl. Environ. Microbiol</source>
<year>2019</year>, <volume>85</volume> (<issue>14</issue>), <comment>No. e00641&#x02013;19.</comment></mixed-citation></ref><ref id="R20"><label>(20)</label><mixed-citation publication-type="journal"><name><surname>Boehm</surname><given-names>AB</given-names></name>; <name><surname>Graham</surname><given-names>KE</given-names></name>; <name><surname>Jennings</surname><given-names>WC</given-names></name>
<article-title>Can We Swim Yet? Systematic Review, Meta-Analysis, and Risk Assessment of Aging Sewage in Surface Waters</article-title>. <source>Environ. Sci. Technol</source>
<year>2018</year>, <volume>52</volume> (<issue>17</issue>), <fpage>9634</fpage>&#x02013;<lpage>9645</lpage>.<pub-id pub-id-type="pmid">30080397</pub-id>
</mixed-citation></ref><ref id="R21"><label>(21)</label><mixed-citation publication-type="journal"><name><surname>Bernhard</surname><given-names>AE</given-names></name>; <name><surname>Field</surname><given-names>KG</given-names></name>
<article-title>A PCR Assay To Discriminate Human and Ruminant Feces on the Basis of Host Differences in <italic toggle="yes">Bacteroides-Prevotella</italic> Genes Encoding 16S rRNA</article-title>. <source>Appl. Environ. Microbiol</source>
<year>2000</year>, <volume>66</volume> (<issue>10</issue>), <fpage>4571</fpage>&#x02013;<lpage>4574</lpage>.<pub-id pub-id-type="pmid">11010920</pub-id>
</mixed-citation></ref><ref id="R22"><label>(22)</label><mixed-citation publication-type="journal"><name><surname>Haugland</surname><given-names>RA</given-names></name>; <name><surname>Varma</surname><given-names>M</given-names></name>; <name><surname>Sivaganesan</surname><given-names>M</given-names></name>; <name><surname>Kelty</surname><given-names>C</given-names></name>; <name><surname>Peed</surname><given-names>L</given-names></name>; <name><surname>Shanks</surname><given-names>OC</given-names></name>
<article-title>Evaluation of Genetic Markers from the 16S rRNA Gene V2 Region for Use in Quantitative Detection of Selected Bacteroidales Species and Human Fecal Waste by qPCR</article-title>. <source>Syst. Appl. Microbiol</source>
<year>2010</year>, <volume>33</volume> (<issue>6</issue>), <fpage>348</fpage>&#x02013;<lpage>357</lpage>.<pub-id pub-id-type="pmid">20655680</pub-id>
</mixed-citation></ref><ref id="R23"><label>(23)</label><mixed-citation publication-type="journal"><name><surname>Green</surname><given-names>HC</given-names></name>; <name><surname>Haugland</surname><given-names>RA</given-names></name>; <name><surname>Varma</surname><given-names>M</given-names></name>; <name><surname>Millen</surname><given-names>HT</given-names></name>; <name><surname>Borchardt</surname><given-names>MA</given-names></name>; <name><surname>Field</surname><given-names>KG</given-names></name>; <name><surname>Walters</surname><given-names>WA</given-names></name>; <name><surname>Knight</surname><given-names>R</given-names></name>; <name><surname>Sivaganesan</surname><given-names>M</given-names></name>; <name><surname>Kelty</surname><given-names>CA</given-names></name>; <name><surname>Shanks</surname><given-names>OC</given-names></name>
<article-title>Improved HF183 Quantitative Real-Time PCR Assay for Characterization of Human Fecal Pollution in Ambient Surface Water Samples</article-title>. <source>Appl. Environ. Microbiol</source>
<year>2014</year>, <volume>80</volume> (<issue>10</issue>), <fpage>3086</fpage>&#x02013;<lpage>3094</lpage>.<pub-id pub-id-type="pmid">24610857</pub-id>
</mixed-citation></ref><ref id="R24"><label>(24)</label><mixed-citation publication-type="journal"><name><surname>Layton</surname><given-names>BA</given-names></name>; <name><surname>Cao</surname><given-names>Y</given-names></name>; <name><surname>Ebentier</surname><given-names>DL</given-names></name>; <name><surname>Hanley</surname><given-names>K</given-names></name>; <name><surname>Ballest&#x000e9;</surname><given-names>E</given-names></name>; <name><surname>Brand&#x000e3;o</surname><given-names>J</given-names></name>; <name><surname>Byappanahalli</surname><given-names>M</given-names></name>; <name><surname>Converse</surname><given-names>R</given-names></name>; <name><surname>Farnleitner</surname><given-names>AH</given-names></name>; <name><surname>Gentry-Shields</surname><given-names>J</given-names></name>; <name><surname>Gidley</surname><given-names>ML</given-names></name>; <name><surname>Gourmelon</surname><given-names>M</given-names></name>; <name><surname>Lee</surname><given-names>CS</given-names></name>; <name><surname>Lee</surname><given-names>J</given-names></name>; <name><surname>Lozach</surname><given-names>S</given-names></name>; <name><surname>Madi</surname><given-names>T</given-names></name>; <name><surname>Meijer</surname><given-names>WG</given-names></name>; <name><surname>Noble</surname><given-names>R</given-names></name>; <name><surname>Peed</surname><given-names>L</given-names></name>; <name><surname>Reischer</surname><given-names>GH</given-names></name>; <name><surname>Rodrigues</surname><given-names>R</given-names></name>; <name><surname>Rose</surname><given-names>JB</given-names></name>; <name><surname>Schriewer</surname><given-names>A</given-names></name>; <name><surname>Sinigalliano</surname><given-names>C</given-names></name>; <name><surname>Srinivasan</surname><given-names>S</given-names></name>; <name><surname>Stewart</surname><given-names>J</given-names></name>; <name><surname>Van De Werfhorst</surname><given-names>LC</given-names></name>; <name><surname>Wang</surname><given-names>D</given-names></name>; <name><surname>Whitman</surname><given-names>R</given-names></name>; <name><surname>Wuertz</surname><given-names>S</given-names></name>; <name><surname>Jay</surname><given-names>J</given-names></name>; <name><surname>Holden</surname><given-names>PA</given-names></name>; <name><surname>Boehm</surname><given-names>AB</given-names></name>; <name><surname>Shanks</surname><given-names>O</given-names></name>; <name><surname>Griffith</surname><given-names>JF</given-names></name>
<article-title>Performance of Human Fecal Anaerobe-Associated PCR-Based Assays in a Multi-Laboratory Method Evaluation Study</article-title>. <source>Water Res</source>
<year>2013</year>, <volume>47</volume> (<issue>18</issue>), <fpage>6897</fpage>&#x02013;<lpage>6908</lpage>.<pub-id pub-id-type="pmid">23992621</pub-id>
</mixed-citation></ref><ref id="R25"><label>(25)</label><mixed-citation publication-type="journal"><name><surname>Li</surname><given-names>X</given-names></name>; <name><surname>Sivaganesan</surname><given-names>M</given-names></name>; <name><surname>Kelty</surname><given-names>CA</given-names></name>; <name><surname>Zimmer-Faust</surname><given-names>A</given-names></name>; <name><surname>Clinton</surname><given-names>P</given-names></name>; <name><surname>Reichman</surname><given-names>JR</given-names></name>; <name><surname>Johnson</surname><given-names>Y</given-names></name>; <name><surname>Matthews</surname><given-names>W</given-names></name>; <name><surname>Bailey</surname><given-names>S</given-names></name>; <name><surname>Shanks</surname><given-names>OC</given-names></name>
<article-title>Large-Scale Implementation of Standardized Quantitative Real-Time PCR Fecal Source Identification Procedures in the Tillamook Bay Watershed</article-title>. <source>PLoS One</source>
<year>2019</year>, <volume>14</volume> (<issue>6</issue>), <comment>No. e0216827</comment>.</mixed-citation></ref><ref id="R26"><label>(26)</label><mixed-citation publication-type="journal"><name><surname>Stachler</surname><given-names>E</given-names></name>; <name><surname>Kelty</surname><given-names>C</given-names></name>; <name><surname>Sivaganesan</surname><given-names>M</given-names></name>; <name><surname>Li</surname><given-names>X</given-names></name>; <name><surname>Bibby</surname><given-names>K</given-names></name>; <name><surname>Shanks</surname><given-names>OC</given-names></name>
<article-title>Quantitative crAssphage PCR Assays for Human Fecal Pollution Measurement</article-title>. <source>Environ. Sci. Technol</source>
<year>2017</year>, <volume>51</volume> (<issue>16</issue>), <fpage>9146</fpage>&#x02013;<lpage>9154</lpage>.<pub-id pub-id-type="pmid">28700235</pub-id>
</mixed-citation></ref><ref id="R27"><label>(27)</label><mixed-citation publication-type="journal"><name><surname>Sabar</surname><given-names>MA</given-names></name>; <name><surname>Honda</surname><given-names>R</given-names></name>; <name><surname>Haramoto</surname><given-names>E</given-names></name>
<article-title>CrAssphage as an Indicator of Human-Fecal Contamination in Water Environment and Virus Reduction in Wastewater Treatment</article-title>. <source>Water Res</source>
<year>2022</year>, <volume>221</volume>, <comment>No. 118827.</comment></mixed-citation></ref><ref id="R28"><label>(28)</label><mixed-citation publication-type="journal"><name><surname>Jennings</surname><given-names>WC</given-names></name>; <name><surname>G&#x000e1;lvez-Arango</surname><given-names>E</given-names></name>; <name><surname>Prieto</surname><given-names>AL</given-names></name>; <name><surname>Boehm</surname><given-names>AB</given-names></name>
<article-title>CrAssphage for Fecal Source Tracking in Chile: Covariation with Norovirus, HF183, and Bacterial Indicators</article-title>. <source>Water Res. X</source>
<year>2020</year>, <volume>9</volume>, <comment>No. 100071.</comment></mixed-citation></ref><ref id="R29"><label>(29)</label><mixed-citation publication-type="journal"><name><surname>Nguyen</surname><given-names>KH</given-names></name>; <name><surname>Smith</surname><given-names>S</given-names></name>; <name><surname>Roundtree</surname><given-names>A</given-names></name>; <name><surname>Feistel</surname><given-names>DJ</given-names></name>; <name><surname>Kirby</surname><given-names>AE</given-names></name>; <name><surname>Levy</surname><given-names>K</given-names></name>; <name><surname>Mattioli</surname><given-names>MC</given-names></name>
<article-title>Fecal Indicators and Antibiotic Resistance Genes Exhibit Diurnal Trends in the Chattahoochee River: Implications for Water Quality Monitoring</article-title>. <source>Front. Microbiol</source>
<year>2022</year>, <volume>13</volume>, <comment>No. 1029176.</comment></mixed-citation></ref><ref id="R30"><label>(30)</label><mixed-citation publication-type="journal"><name><surname>Staley</surname><given-names>ZR</given-names></name>; <name><surname>Vogel</surname><given-names>L</given-names></name>; <name><surname>Robinson</surname><given-names>C</given-names></name>; <name><surname>Edge</surname><given-names>TA</given-names></name>
<article-title>Differential Occurrence of <italic toggle="yes">Escherichia coli</italic> and Human Bacteroidales at Two Great Lakes Beaches</article-title>. <source>J. Great Lakes Res</source>
<year>2015</year>, <volume>41</volume> (<issue>2</issue>), <fpage>530</fpage>&#x02013;<lpage>535</lpage>.</mixed-citation></ref><ref id="R31"><label>(31)</label><mixed-citation publication-type="journal"><name><surname>Stachler</surname><given-names>E</given-names></name>; <name><surname>Akyon</surname><given-names>B</given-names></name>; <name><surname>De Carvalho</surname><given-names>NA</given-names></name>; <name><surname>Ference</surname><given-names>C</given-names></name>; <name><surname>Bibby</surname><given-names>K</given-names></name>
<article-title>Correlation of crAssphage qPCR Markers with Culturable and Molecular Indicators of Human Fecal Pollution in an Impacted Urban Watershed</article-title>. <source>Environ. Sci. Technol</source>
<year>2018</year>, <volume>52</volume> (<issue>13</issue>), <fpage>7505</fpage>&#x02013;<lpage>7512</lpage>.<pub-id pub-id-type="pmid">29874457</pub-id>
</mixed-citation></ref><ref id="R32"><label>(32)</label><mixed-citation publication-type="journal"><name><surname>Shahin</surname><given-names>SA</given-names></name>; <name><surname>Keevy</surname><given-names>H</given-names></name>; <name><surname>Dada</surname><given-names>AC</given-names></name>; <name><surname>Gyawali</surname><given-names>P</given-names></name>; <name><surname>Sherchan</surname><given-names>SP</given-names></name>
<article-title>Incidence of Human Associated HF183 <italic toggle="yes">Bacteroides</italic> Marker and <italic toggle="yes">E. coli</italic> Levels in New Orleans Canals</article-title>. <source>Sci. Total Environ</source>
<year>2022</year>, <volume>806</volume> (<issue>1</issue>), <comment>No. 150356.</comment></mixed-citation></ref><ref id="R33"><label>(33)</label><mixed-citation publication-type="journal"><name><surname>Nappier</surname><given-names>SP</given-names></name>; <name><surname>Hong</surname><given-names>T</given-names></name>; <name><surname>Ichida</surname><given-names>A</given-names></name>; <name><surname>Goldstone</surname><given-names>A</given-names></name>; <name><surname>Eftim</surname><given-names>SE</given-names></name>
<article-title>Occurrence of Coliphage in Raw Wastewater and in Ambient Water: A Meta-Analysis</article-title>. <source>Water Res</source>
<year>2019</year>, <volume>153</volume>, <fpage>263</fpage>&#x02013;<lpage>273</lpage>.<pub-id pub-id-type="pmid">30735956</pub-id>
</mixed-citation></ref><ref id="R34"><label>(34)</label><mixed-citation publication-type="journal"><name><surname>Havelaar</surname><given-names>AH</given-names></name>; <name><surname>Pot-Hogeboom</surname><given-names>WM</given-names></name>; <name><surname>Furuse</surname><given-names>K</given-names></name>; <name><surname>Pot</surname><given-names>R</given-names></name>; <name><surname>Hormann</surname><given-names>MP</given-names></name>
<article-title>F-specific RNA Bacteriophages and Sensitive Host Strains in Faeces and Wastewater of Human and Animal Origin</article-title>. <source>J. Appl. Bacteriol</source>
<year>1990</year>, <volume>69</volume> (<issue>1</issue>), <fpage>30</fpage>&#x02013;<lpage>37</lpage>.<pub-id pub-id-type="pmid">2204615</pub-id>
</mixed-citation></ref><ref id="R35"><label>(35)</label><mixed-citation publication-type="journal"><name><surname>Schaper</surname><given-names>M</given-names></name>; <name><surname>Jofre</surname><given-names>J</given-names></name>; <name><surname>Uys</surname><given-names>M</given-names></name>; <name><surname>Grabow</surname><given-names>WOK</given-names></name>
<article-title>Distribution of Genotypes of F-Specific RNA Bacteriophages in Human and Non-Human Sources of Faecal Pollution in South Africa and Spain</article-title>. <source>J. Appl. Microbiol</source>
<year>2002</year>, <volume>92</volume> (<issue>4</issue>), <fpage>657</fpage>&#x02013;<lpage>667</lpage>.<pub-id pub-id-type="pmid">11966906</pub-id>
</mixed-citation></ref><ref id="R36"><label>(36)</label><mixed-citation publication-type="journal"><name><surname>Stewart-Pullaro</surname><given-names>J</given-names></name>; <name><surname>Daugomah</surname><given-names>JW</given-names></name>; <name><surname>Chestnut</surname><given-names>DE</given-names></name>; <name><surname>Graves</surname><given-names>DA</given-names></name>; <name><surname>Sobsey</surname><given-names>MD</given-names></name>; <name><surname>Scott</surname><given-names>GIF</given-names></name>
<article-title><sup>+</sup> RNA Coliphage Typing for Microbial Source Tracking in Surface Waters</article-title>. <source>J. Appl. Microbiol</source>
<year>2006</year>, <volume>101</volume> (<issue>5</issue>), <fpage>1015</fpage>&#x02013;<lpage>1026</lpage>.<pub-id pub-id-type="pmid">17040225</pub-id>
</mixed-citation></ref><ref id="R37"><label>(37)</label><mixed-citation publication-type="journal"><name><surname>Sowah</surname><given-names>RA</given-names></name>; <name><surname>Molina</surname><given-names>M</given-names></name>; <name><surname>Georgacopoulos</surname><given-names>O</given-names></name>; <name><surname>Snyder</surname><given-names>B</given-names></name>; <name><surname>Cyterski</surname><given-names>M</given-names></name>
<article-title>Sources and Drivers of ARGs in Urban Streams in Atlanta, Georgia</article-title>. <source>Microorganisms</source>
<year>2022</year>, <volume>10</volume> (<issue>9</issue>), <fpage>1804</fpage>.<pub-id pub-id-type="pmid">36144405</pub-id>
</mixed-citation></ref><ref id="R38"><label>(38)</label><mixed-citation publication-type="journal"><name><surname>Bihn</surname><given-names>EA</given-names></name>; <name><surname>Mangione</surname><given-names>KJ</given-names></name>; <name><surname>Lyons</surname><given-names>B</given-names></name>; <name><surname>Wszelaki</surname><given-names>AL</given-names></name>; <name><surname>Churey</surname><given-names>JJ</given-names></name>; <name><surname>Stoeckel</surname><given-names>DM</given-names></name>; <name><surname>Worobo</surname><given-names>RW</given-names></name>
<article-title>Development of an Irrigation Water Quality Database to Identify Water Resources and Assess Microbiological Risks During the Production of Fresh Fruits and Vegetables</article-title>. <source>Front. Water</source>
<year>2021</year>, <volume>3</volume>, <comment>No. 741653.</comment></mixed-citation></ref><ref id="R39"><label>(39)</label><mixed-citation publication-type="journal"><name><surname>McKee</surname><given-names>BA</given-names></name>; <name><surname>Molina</surname><given-names>M</given-names></name>; <name><surname>Cyterski</surname><given-names>M</given-names></name>; <name><surname>Couch</surname><given-names>A</given-names></name>
<article-title>Microbial Source Tracking (MST) in Chattahoochee River National Recreation Area: Seasonal and Precipitation Trends in MST Marker Concentrations, and Associations with <italic toggle="yes">E. coli</italic> Levels, Pathogenic Marker Presence, and Land Use</article-title>. <source>Water Res</source>
<year>2020</year>, <volume>171</volume>, <comment>No. 115435.</comment></mixed-citation></ref><ref id="R40"><label>(40)</label><mixed-citation publication-type="journal"><name><surname>Jennings</surname><given-names>WC</given-names></name>; <name><surname>Chern</surname><given-names>EC</given-names></name>; <name><surname>O&#x02019;Donohue</surname><given-names>D</given-names></name>; <name><surname>Kellogg</surname><given-names>MG</given-names></name>; <name><surname>Boehm</surname><given-names>AB</given-names></name>
<article-title>Frequent Detection of a Human Fecal Indicator in the Urban Ocean: Environmental Drivers and Covariation with Enterococci</article-title>. <source>Environ. Sci.: Processes Impacts</source>
<year>2018</year>, <volume>20</volume> (<issue>3</issue>), <fpage>480</fpage>&#x02013;<lpage>492</lpage>.</mixed-citation></ref><ref id="R41"><label>(41)</label><mixed-citation publication-type="journal"><name><surname>Staley</surname><given-names>C</given-names></name>; <name><surname>Reckhow</surname><given-names>KH</given-names></name>; <name><surname>Lukasik</surname><given-names>J</given-names></name>; <name><surname>Harwood</surname><given-names>VJ</given-names></name>
<article-title>Assessment of Sources of Human Pathogens and Fecal Contamination in a Florida Freshwater Lake</article-title>. <source>Water Res</source>
<year>2012</year>, <volume>46</volume> (<issue>17</issue>), <fpage>5799</fpage>&#x02013;<lpage>5812</lpage>.<pub-id pub-id-type="pmid">22939220</pub-id>
</mixed-citation></ref><ref id="R42"><label>(42)</label><mixed-citation publication-type="journal"><name><surname>Weller</surname><given-names>DL</given-names></name>; <name><surname>Love</surname><given-names>TMT</given-names></name>; <name><surname>Wiedmann</surname><given-names>M</given-names></name>
<article-title>Interpretability Versus Accuracy: A Comparison of Machine Learning Models Built Using Different Algorithms, Performance Measures, and Features to Predict <italic toggle="yes">E. coli</italic> Levels in Agricultural Water</article-title>. <source>Front. Artif. Intell</source>
<year>2021</year>, <volume>4</volume>, <comment>No. 628441.</comment></mixed-citation></ref><ref id="R43"><label>(43)</label><mixed-citation publication-type="journal"><name><surname>Weller</surname><given-names>DL</given-names></name>; <name><surname>Love</surname><given-names>TMT</given-names></name>; <name><surname>Belias</surname><given-names>A</given-names></name>; <name><surname>Wiedmann</surname><given-names>M</given-names></name>
<article-title>Predictive Models May Complement or Provide an Alternative to Existing Strategies for Assessing the Enteric Pathogen Contamination Status of Northeastern Streams Used to Provide Water for Produce Production</article-title>. <source>Front Sustainable Food Syst</source>
<year>2020</year>, <volume>4</volume>, <comment>No. 561517.</comment></mixed-citation></ref><ref id="R44"><label>(44)</label><mixed-citation publication-type="journal"><name><surname>Belias</surname><given-names>A</given-names></name>; <name><surname>Brassill</surname><given-names>N</given-names></name>; <name><surname>Roof</surname><given-names>S</given-names></name>; <name><surname>Rock</surname><given-names>C</given-names></name>; <name><surname>Wiedmann</surname><given-names>M</given-names></name>; <name><surname>Weller</surname><given-names>D</given-names></name>
<article-title>Cross-Validation Indicates Predictive Models May Provide an Alternative to Indicator Organism Monitoring for Evaluating Pathogen Presence in Southwestern US Agricultural Water</article-title>. <source>Front. Water</source>
<year>2021</year>, <volume>3</volume>, <comment>No. 693631.</comment></mixed-citation></ref><ref id="R45"><label>(45)</label><mixed-citation publication-type="journal"><name><surname>Green</surname><given-names>H</given-names></name>; <name><surname>Wilder</surname><given-names>M</given-names></name>; <name><surname>Wiedmann</surname><given-names>M</given-names></name>; <name><surname>Weller</surname><given-names>D</given-names></name>
<article-title>Integrative Survey of 68 Non-Overlapping Upstate New York Watersheds Reveals Stream Features Associated With Aquatic Fecal Contamination</article-title>. <source>Front. Microbiol</source>
<year>2021</year>, <volume>12</volume>, <comment>No. 684533.</comment></mixed-citation></ref><ref id="R46"><label>(46)</label><mixed-citation publication-type="book"><name><surname>Griffith</surname><given-names>GE</given-names></name>; <name><surname>Omernik</surname><given-names>JM</given-names></name>; <name><surname>Comstock</surname><given-names>JA</given-names></name>; <name><surname>Lawrence</surname><given-names>S</given-names></name>; <name><surname>Martin</surname><given-names>G</given-names></name>; <name><surname>Goddard</surname><given-names>A</given-names></name>; <name><surname>Hulcher</surname><given-names>VJ</given-names></name>; <name><surname>Foster</surname><given-names>T</given-names></name>
<source>Ecoregions of Alabama and Georgia (Color Poster with Map, Descriptive Text, Summary Tables, and Photographs)</source>; <publisher-name>U.S. Geological Survey</publisher-name>: <publisher-loc>Reston, VA</publisher-loc>, <year>2001</year>. <comment><ext-link xlink:href="https://www.epa.gov/eco-research/ecoregion-download-files-state-region-4" ext-link-type="uri">https://www.epa.gov/eco-research/ecoregion-download-files-state-region-4</ext-link>.</comment></mixed-citation></ref><ref id="R47"><label>(47)</label><mixed-citation publication-type="journal"><name><surname>Sullivan</surname><given-names>DG</given-names></name>; <name><surname>Batten</surname><given-names>HL</given-names></name>; <name><surname>Bosch</surname><given-names>D</given-names></name>; <name><surname>Sheridan</surname><given-names>J</given-names></name>; <name><surname>Strickland</surname><given-names>T</given-names></name>
<article-title>Little River Experimental Watershed, Tifton, Georgia, United States: A Geographic Database</article-title>. <source>Water Resour. Res</source>
<year>2007</year>, <volume>43</volume> (<issue>9</issue>), <comment>No. 2006WR005836.</comment></mixed-citation></ref><ref id="R48"><label>(48)</label><mixed-citation publication-type="journal"><name><surname>Kahler</surname><given-names>AM</given-names></name>; <name><surname>Hofstetter</surname><given-names>J</given-names></name>; <name><surname>Arrowood</surname><given-names>M</given-names></name>; <name><surname>Peterson</surname><given-names>A</given-names></name>; <name><surname>Jacobson</surname><given-names>D</given-names></name>; <name><surname>Barratt</surname><given-names>J</given-names></name>; <name><surname>da Silva</surname><given-names>ALBR</given-names></name>; <name><surname>Rodrigues</surname><given-names>C</given-names></name>; <name><surname>Mattioli</surname><given-names>MC</given-names></name>
<article-title>Sources and Prevalence of <italic toggle="yes">Cyclospora cayetanensis</italic> in Southeastern U.S. Growing Environments</article-title>. <source>J. Food Prot</source>
<year>2024</year>, <volume>87</volume>, <comment>No. 100309.</comment></mixed-citation></ref><ref id="R49"><label>(49)</label><mixed-citation publication-type="book"><name><surname>Kahler</surname><given-names>AM</given-names></name>; <name><surname>Hill</surname><given-names>VR</given-names></name>
<part-title>Detection of <italic toggle="yes">Cryptosporidium</italic> Recovered from Large-Volume Water Samples Using Dead-End Ultrafiltration</part-title>. In <source>Cryptosporidium</source>; <name><surname>Mead</surname><given-names>JR</given-names></name>; <name><surname>Arrowood</surname><given-names>MJ</given-names></name>, Eds.; <publisher-name>Methods in Molecular Biology; Humana</publisher-name>: <publisher-loc>New York, NY</publisher-loc>, <year>2020</year>; Vol. <volume>2052</volume>.</mixed-citation></ref><ref id="R50"><label>(50)</label><mixed-citation publication-type="book"><name><surname>Environmental Protection Agency</surname><given-names>US</given-names></name>. <source>Method 1696: Characterization of Human Fecal Pollution in Water by TaqMan Quantitative Polymerase Chain Reaction (qPCR) Assay; EPA 821-R-19&#x02013;002</source>; <publisher-name>U.S. EPA Office of Research and Development</publisher-name>: <publisher-loc>Cincinnati, OH</publisher-loc>, <year>2019</year>. <comment><ext-link xlink:href="https://www.epa.gov/sites/default/files/2019-03/documents/method_1696_draft_2019.pdf" ext-link-type="uri">https://www.epa.gov/sites/default/files/2019-03/documents/method_1696_draft_2019.pdf</ext-link>.</comment></mixed-citation></ref><ref id="R51"><label>(51)</label><mixed-citation publication-type="book"><name><surname>Hill</surname><given-names>V</given-names></name>; <name><surname>Vellidis</surname><given-names>G</given-names></name>; <name><surname>Levy</surname><given-names>K</given-names></name>
<source>Improved Sampling and Analytical Methods for Testing Agricultural Water for Pathogens, Surrogates and Source Tracking Indicators</source>; <publisher-name>Center for Produce Safety</publisher-name>, <year>2017</year>. <comment><ext-link xlink:href="https://www.centerforproducesafety.org/assets/research-database/Hill-2014-Final-Report.pdf" ext-link-type="uri">https://www.centerforproducesafety.org/assets/research-database/Hill-2014-Final-Report.pdf</ext-link>.</comment></mixed-citation></ref><ref id="R52"><label>(52)</label><mixed-citation publication-type="book"><collab>U.S. Environmental Protection Agency</collab>. <source>Method 1602: Male-Specific (F+) and Somatic Coliphage in Water by Single Agar Layer (SAL) Procedure; EPA 821-R-01&#x02013;029</source>; <publisher-name>U.S. EPA Office of Water</publisher-name>: <publisher-loc>Washington, DC</publisher-loc>, <year>2001</year>. <comment><ext-link xlink:href="https://www.epa.gov/sites/default/files/2015-12/documents/method_1602_2001.pdf" ext-link-type="uri">https://www.epa.gov/sites/default/files/2015-12/documents/method_1602_2001.pdf</ext-link>.</comment></mixed-citation></ref><ref id="R53"><label>(53)</label><mixed-citation publication-type="journal"><name><surname>Friedman</surname><given-names>SD</given-names></name>; <name><surname>Cooper</surname><given-names>EM</given-names></name>; <name><surname>Calci</surname><given-names>KR</given-names></name>; <name><surname>Genthner</surname><given-names>FJ</given-names></name>
<article-title>Design and Assessment of a Real Time Reverse Transcription-PCR Method to Genotype Single-Stranded RNA Male-Specific Coliphages (Family Leviviridae)</article-title>. <source>J. Virol. Methods</source>
<year>2011</year>, <volume>173</volume> (<issue>2</issue>), <fpage>196</fpage>&#x02013;<lpage>202</lpage>.<pub-id pub-id-type="pmid">21320531</pub-id>
</mixed-citation></ref><ref id="R54"><label>(54)</label><mixed-citation publication-type="journal"><name><surname>Polaczyk</surname><given-names>AL</given-names></name>; <name><surname>Narayanan</surname><given-names>J</given-names></name>; <name><surname>Cromeans</surname><given-names>TL</given-names></name>; <name><surname>Hahn</surname><given-names>D</given-names></name>; <name><surname>Roberts</surname><given-names>JM</given-names></name>; <name><surname>Amburgey</surname><given-names>JE</given-names></name>; <name><surname>Hill</surname><given-names>VR</given-names></name>
<article-title>Ultrafiltration-Based Techniques for Rapid and Simultaneous Concentration of Multiple Microbe Classes from 100-L Tap Water Samples</article-title>. <source>J. Microbiol. Methods</source>
<year>2008</year>, <volume>73</volume> (<issue>2</issue>), <fpage>92</fpage>&#x02013;<lpage>99</lpage>.<pub-id pub-id-type="pmid">18395278</pub-id>
</mixed-citation></ref><ref id="R55"><label>(55)</label><mixed-citation publication-type="journal"><name><surname>Hill</surname><given-names>VR</given-names></name>; <name><surname>Narayanan</surname><given-names>J</given-names></name>; <name><surname>Gallen</surname><given-names>RR</given-names></name>; <name><surname>Ferdinand</surname><given-names>KL</given-names></name>; <name><surname>Cromeans</surname><given-names>T</given-names></name>; <name><surname>Vinj&#x000e9;</surname><given-names>J</given-names></name>
<article-title>Development of a Nucleic Acid Extraction Procedure for Simultaneous Recovery of DNA and RNA from Diverse Microbes in Water</article-title>. <source>Pathogens</source>
<year>2015</year>, <volume>4</volume> (<issue>2</issue>), <fpage>335</fpage>&#x02013;<lpage>354</lpage>.<pub-id pub-id-type="pmid">26016775</pub-id>
</mixed-citation></ref><ref id="R56"><label>(56)</label><mixed-citation publication-type="book"><collab>University of Georgia</collab>. <source>UGA Weather&#x02014;Automated Environmental Monitoring Network Page</source>; <publisher-name>UGA Weather Network</publisher-name>, <year>2023</year>. <comment><ext-link xlink:href="http://www.georgiaweather.net/?variable=HI&#x00026;site=TYTY" ext-link-type="uri">http://www.georgiaweather.net/?variable=HI&#x00026;site=TYTY</ext-link>.</comment></mixed-citation></ref><ref id="R57"><label>(57)</label><mixed-citation publication-type="book"><collab>National Water and Climate Center</collab>. <source>Little River-Site Information and Reports; Soil Climate Analysis Network; Site 2027</source>; <publisher-name>Natural Resources Conservation Service, United States Department of Agriculture</publisher-name>, <year>2023</year>. <comment><ext-link xlink:href="https://wcc.sc.egov.usda.gov/nwcc/site?sitenum=2027" ext-link-type="uri">https://wcc.sc.egov.usda.gov/nwcc/site?sitenum=2027</ext-link>.</comment></mixed-citation></ref><ref id="R58"><label>(58)</label><mixed-citation publication-type="journal"><name><surname>Wiesner-Friedman</surname><given-names>C</given-names></name>; <name><surname>Beattie</surname><given-names>RE</given-names></name>; <name><surname>Stewart</surname><given-names>JR</given-names></name>; <name><surname>Hristova</surname><given-names>KR</given-names></name>; <name><surname>Serre</surname><given-names>ML</given-names></name>
<article-title>Microbial Find, Inform, and Test Model for Identifying Spatially Distributed Contamination Sources: Framework Foundation and Demonstration of Ruminant <italic toggle="yes">Bacteroides</italic> Abundance in River Sediments</article-title>. <source>Environ. Sci. Technol</source>
<year>2021</year>, <volume>55</volume> (<issue>15</issue>), <fpage>10451</fpage>&#x02013;<lpage>10461</lpage>.<pub-id pub-id-type="pmid">34291905</pub-id>
</mixed-citation></ref><ref id="R59"><label>(59)</label><mixed-citation publication-type="journal"><name><surname>Holcomb</surname><given-names>DA</given-names></name>; <name><surname>Messier</surname><given-names>KP</given-names></name>; <name><surname>Serre</surname><given-names>ML</given-names></name>; <name><surname>Rowny</surname><given-names>JG</given-names></name>; <name><surname>Stewart</surname><given-names>JR</given-names></name>
<article-title>Geostatistical Prediction of Microbial Water Quality throughout a Stream Network Using Meteorology, Land Cover, and Spatiotemporal Autocorrelation</article-title>. <source>Environ. Sci. Technol</source>
<year>2018</year>, <volume>52</volume> (<issue>14</issue>), <fpage>7775</fpage>&#x02013;<lpage>7784</lpage>.<pub-id pub-id-type="pmid">29886747</pub-id>
</mixed-citation></ref><ref id="R60"><label>(60)</label><mixed-citation publication-type="webpage"><collab>Georgia Department of Public Health</collab>. <source>Manual for On-Site Sewage Management Systems</source>, <year>2019</year>. <comment><ext-link xlink:href="https://dph.georgia.gov/document/document/manual-site-sewage-management-systems-rules/download" ext-link-type="uri">https://dph.georgia.gov/document/document/manual-site-sewage-management-systems-rules/download</ext-link>.</comment></mixed-citation></ref><ref id="R61"><label>(61)</label><mixed-citation publication-type="book"><collab>R Core Team</collab>. <source>R: A Language and Environment for Statistical Computing</source>; <publisher-name>R Foundation for Statistical Computing</publisher-name>: <publisher-loc>Vienna, Austria</publisher-loc>, <year>2024</year>. <comment><ext-link xlink:href="https://www.R-project.org/" ext-link-type="uri">https://www.R-project.org/</ext-link>.</comment></mixed-citation></ref><ref id="R62"><label>(62)</label><mixed-citation publication-type="journal"><name><surname>Bates</surname><given-names>D</given-names></name>; <name><surname>M&#x000e4;chler</surname><given-names>M</given-names></name>; <name><surname>Bolker</surname><given-names>B</given-names></name>; <name><surname>Walker</surname><given-names>S</given-names></name>
<article-title>Fitting Linear Mixed-Effects Models Using lme4</article-title>. <source>J. Stat. Softw</source>
<year>2015</year>, <volume>67</volume> (<issue>1</issue>), <fpage>1</fpage>&#x02013;<lpage>48</lpage>.</mixed-citation></ref><ref id="R63"><label>(63)</label><mixed-citation publication-type="journal"><name><surname>Hothorn</surname><given-names>T</given-names></name>; <name><surname>Hornik</surname><given-names>K</given-names></name>; <name><surname>Zeileis</surname><given-names>A</given-names></name>
<article-title>Unbiased Recursive Partitioning: A Conditional Inference Framework</article-title>. <source>J. Comput. Graph. Stat</source>
<year>2006</year>, <volume>15</volume> (<issue>3</issue>), <fpage>651</fpage>&#x02013;<lpage>674</lpage>.</mixed-citation></ref><ref id="R64"><label>(64)</label><mixed-citation publication-type="journal"><name><surname>Strobl</surname><given-names>C</given-names></name>; <name><surname>Boulesteix</surname><given-names>A-L</given-names></name>; <name><surname>Zeileis</surname><given-names>A</given-names></name>; <name><surname>Hothorn</surname><given-names>T</given-names></name>
<article-title>Bias in Random Forest Variable Importance Measures: Illustrations, Sources and a Solution</article-title>. <source>BMC Bioinf</source>
<year>2007</year>, <volume>8</volume> (<issue>1</issue>), <fpage>25</fpage>.</mixed-citation></ref><ref id="R65"><label>(65)</label><mixed-citation publication-type="journal"><name><surname>Strobl</surname><given-names>C</given-names></name>; <name><surname>Malley</surname><given-names>J</given-names></name>; <name><surname>Tutz</surname><given-names>G</given-names></name>
<article-title>An Introduction to Recursive Partitioning: Rationale, Application, and Characteristics of Classification and Regression Trees, Bagging, and Random Forests</article-title>. <source>Psychol. Methods</source>
<year>2009</year>, <volume>14</volume> (<issue>4</issue>), <fpage>323</fpage>&#x02013;<lpage>348</lpage>.<pub-id pub-id-type="pmid">19968396</pub-id>
</mixed-citation></ref><ref id="R66"><label>(66)</label><mixed-citation publication-type="journal"><name><surname>Bischl</surname><given-names>B</given-names></name>; <name><surname>Lang</surname><given-names>M</given-names></name>; <name><surname>Kotthoff</surname><given-names>L</given-names></name>; <name><surname>Schiffner</surname><given-names>J</given-names></name>; <name><surname>Richter</surname><given-names>J</given-names></name>; <name><surname>Studerus</surname><given-names>E</given-names></name>; <name><surname>Casalicchio</surname><given-names>G</given-names></name>; <name><surname>Jones</surname><given-names>ZM</given-names></name>
<article-title>mlr: Machine Learning in R</article-title>. <source>J. Mach. Learn. Res</source>
<year>2016</year>, <volume>17</volume> (<issue>170</issue>), <fpage>1</fpage>&#x02013;<lpage>5</lpage>.</mixed-citation></ref><ref id="R67"><label>(67)</label><mixed-citation publication-type="journal"><name><surname>Chawla</surname><given-names>NV</given-names></name>; <name><surname>Bowyer</surname><given-names>KW</given-names></name>; <name><surname>Hall</surname><given-names>LO</given-names></name>; <name><surname>Kegelmeyer</surname><given-names>WP</given-names></name>
<article-title>SMOTE: Synthetic Minority Over-sampling Technique</article-title>. <source>J. Artif. Intell. Res</source>
<year>2002</year>, <volume>16</volume>, <fpage>321</fpage>&#x02013;<lpage>357</lpage>.</mixed-citation></ref><ref id="R68"><label>(68)</label><mixed-citation publication-type="book"><name><surname>Bischl</surname><given-names>B</given-names></name>; <name><surname>K&#x000fc;hn</surname><given-names>T</given-names></name>; <name><surname>Szepannek</surname><given-names>G</given-names></name>
<part-title>On Class Imbalance Correction for Classification Algorithms in Credit Scoring</part-title>. In <source>Operations Research Proceedings 2014</source>; <name><surname>L&#x000fc;bbecke</surname><given-names>M</given-names></name>; <name><surname>Koster</surname><given-names>A</given-names></name>; <name><surname>Letmathe</surname><given-names>P</given-names></name>; <name><surname>Madlener</surname><given-names>R</given-names></name>; <name><surname>Peis</surname><given-names>B</given-names></name>; <name><surname>Walther</surname><given-names>G</given-names></name>, Eds.; <comment>Operations Research Proceedings</comment>; <publisher-name>Springer International Publishing</publisher-name>: <publisher-loc>Cham</publisher-loc>, <year>2016</year>; pp <fpage>37</fpage>&#x02013;<lpage>43</lpage>.</mixed-citation></ref><ref id="R69"><label>(69)</label><mixed-citation publication-type="journal"><name><surname>Weller</surname><given-names>DL</given-names></name>; <name><surname>Love</surname><given-names>TMT</given-names></name>; <name><surname>Wiedmann</surname><given-names>M</given-names></name>
<article-title>Comparison of Resampling Algorithms to Address Class Imbalance When Developing Machine Learning Models to Predict Foodborne Pathogen Presence in Agricultural Water</article-title>. <source>Front. Environ. Sci</source>
<year>2021</year>, <volume>9</volume>, <comment>No. 701288.</comment></mixed-citation></ref><ref id="R70"><label>(70)</label><mixed-citation publication-type="journal"><name><surname>Kim</surname><given-names>M</given-names></name>; <name><surname>Hwang</surname><given-names>K-B</given-names></name>
<article-title>An Empirical Evaluation of Sampling Methods for the Classification of Imbalanced Data</article-title>. <source>PLoS One</source>
<year>2022</year>, <volume>17</volume> (<issue>7</issue>), <comment>No. e0271260.</comment></mixed-citation></ref><ref id="R71"><label>(71)</label><mixed-citation publication-type="journal"><name><surname>Van Den Goorbergh</surname><given-names>R</given-names></name>; <name><surname>Van Smeden</surname><given-names>M</given-names></name>; <name><surname>Timmerman</surname><given-names>D</given-names></name>; <name><surname>Van Calster</surname><given-names>B</given-names></name>
<article-title>The Harm of Class Imbalance Corrections for Risk Prediction Models: Illustration and Simulation Using Logistic Regression</article-title>. <source>J. Am. Med. Inform. Assoc</source>
<year>2022</year>, <volume>29</volume> (<issue>9</issue>), <fpage>1525</fpage>&#x02013;<lpage>1534</lpage>.<pub-id pub-id-type="pmid">35686364</pub-id>
</mixed-citation></ref><ref id="R72"><label>(72)</label><mixed-citation publication-type="journal"><name><surname>Piccininni</surname><given-names>M</given-names></name>; <name><surname>Wechsung</surname><given-names>M</given-names></name>; <name><surname>Van Calster</surname><given-names>B</given-names></name>; <name><surname>Rohmann</surname><given-names>JL</given-names></name>; <name><surname>Konigorski</surname><given-names>S</given-names></name>; <name><surname>Van Smeden</surname><given-names>M</given-names></name>
<article-title>Understanding Random Resampling Techniques for Class Imbalance Correction and Their Consequences on Calibration and Discrimination of Clinical Risk Prediction Models</article-title>. <source>J. Biomed. Inf</source>
<year>2024</year>, <volume>155</volume>, <comment>No. 104666.</comment></mixed-citation></ref><ref id="R73"><label>(73)</label><mixed-citation publication-type="journal"><name><surname>Strobl</surname><given-names>C</given-names></name>; <name><surname>Boulesteix</surname><given-names>A-L</given-names></name>; <name><surname>Kneib</surname><given-names>T</given-names></name>; <name><surname>Augustin</surname><given-names>T</given-names></name>; <name><surname>Zeileis</surname><given-names>A</given-names></name>
<article-title>Conditional Variable Importance for Random Forests</article-title>. <source>BMC Bioinf</source>
<year>2008</year>, <volume>9</volume> (<issue>1</issue>), <fpage>307</fpage>.</mixed-citation></ref><ref id="R74"><label>(74)</label><mixed-citation publication-type="journal"><name><surname>Janitza</surname><given-names>S</given-names></name>; <name><surname>Strobl</surname><given-names>C</given-names></name>; <name><surname>Boulesteix</surname><given-names>A-L</given-names></name>
<article-title>An AUC-Based Permutation Variable Importance Measure for Random Forests</article-title>. <source>BMC Bioinf</source>
<year>2013</year>, <volume>14</volume> (<issue>1</issue>), <fpage>119</fpage>.</mixed-citation></ref><ref id="R75"><label>(75)</label><mixed-citation publication-type="journal"><name><surname>Robin</surname><given-names>X</given-names></name>; <name><surname>Turck</surname><given-names>N</given-names></name>; <name><surname>Hainard</surname><given-names>A</given-names></name>; <name><surname>Tiberti</surname><given-names>N</given-names></name>; <name><surname>Lisacek</surname><given-names>F</given-names></name>; <name><surname>Sanchez</surname><given-names>J-C</given-names></name>; <name><surname>M&#x000fc;ller</surname><given-names>M</given-names></name>
<article-title>pROC: An Open-Source Package for R and S+ to Analyze and Compare ROC Curves</article-title>. <source>BMC Bioinf</source>
<year>2011</year>, <volume>12</volume> (<issue>1</issue>), <fpage>77</fpage>.</mixed-citation></ref><ref id="R76"><label>(76)</label><mixed-citation publication-type="journal"><name><surname>Steyerberg</surname><given-names>EW</given-names></name>; <name><surname>Vickers</surname><given-names>AJ</given-names></name>; <name><surname>Cook</surname><given-names>NR</given-names></name>; <name><surname>Gerds</surname><given-names>T</given-names></name>; <name><surname>Gonen</surname><given-names>M</given-names></name>; <name><surname>Obuchowski</surname><given-names>N</given-names></name>; <name><surname>Pencina</surname><given-names>MJ</given-names></name>; <name><surname>Kattan</surname><given-names>MW</given-names></name>
<article-title>Assessing the Performance of Prediction Models: A Framework for Traditional and Novel Measures</article-title>. <source>Epidemiology</source>
<year>2010</year>, <volume>21</volume> (<issue>1</issue>), <fpage>128</fpage>&#x02013;<lpage>138</lpage>.<pub-id pub-id-type="pmid">20010215</pub-id>
</mixed-citation></ref><ref id="R77"><label>(77)</label><mixed-citation publication-type="journal"><name><surname>Fawcett</surname><given-names>T</given-names></name>
<article-title>An Introduction to ROC Analysis</article-title>. <source>Pattern Recognit. Lett</source>
<year>2006</year>, <volume>27</volume> (<issue>8</issue>), <fpage>861</fpage>&#x02013;<lpage>874</lpage>.</mixed-citation></ref><ref id="R78"><label>(78)</label><mixed-citation publication-type="journal"><name><surname>Mandrekar</surname><given-names>JN</given-names></name>
<article-title>Receiver Operating Characteristic Curve in Diagnostic Test Assessment</article-title>. <source>J. Thorac. Oncol</source>
<year>2010</year>, <volume>5</volume> (<issue>9</issue>), <fpage>1315</fpage>&#x02013;<lpage>1316</lpage>.<pub-id pub-id-type="pmid">20736804</pub-id>
</mixed-citation></ref><ref id="R79"><label>(79)</label><mixed-citation publication-type="journal"><name><surname>Youden</surname><given-names>WJ</given-names></name>
<article-title>Index for Rating Diagnostic Tests</article-title>. <source>Cancer</source>
<year>1950</year>, <volume>3</volume> (<issue>1</issue>), <fpage>32</fpage>&#x02013;<lpage>35</lpage>.<pub-id pub-id-type="pmid">15405679</pub-id>
</mixed-citation></ref><ref id="R80"><label>(80)</label><mixed-citation publication-type="book"><collab>Natural Resources Conservation Service</collab>. <source>Web Soil Survey</source>; <publisher-name>U.S. Department of Agriculture</publisher-name>, <year>2023</year>. <comment><ext-link xlink:href="http://websoilsurvey.sc.egov.usda.gov/" ext-link-type="uri">http://websoilsurvey.sc.egov.usda.gov/</ext-link>.</comment></mixed-citation></ref><ref id="R81"><label>(81)</label><mixed-citation publication-type="journal"><name><surname>Silverman</surname><given-names>AI</given-names></name>; <name><surname>Akrong</surname><given-names>MO</given-names></name>; <name><surname>Amoah</surname><given-names>P</given-names></name>; <name><surname>Drechsel</surname><given-names>P</given-names></name>; <name><surname>Nelson</surname><given-names>KL</given-names></name>
<article-title>Quantification of Human Norovirus GII, Human Adenovirus, and Fecal Indicator Organisms in Wastewater Used for Irrigation in Accra, Ghana</article-title>. <source>J. Water Health</source>
<year>2013</year>, <volume>11</volume> (<issue>3</issue>), <fpage>473</fpage>&#x02013;<lpage>488</lpage>.<pub-id pub-id-type="pmid">23981876</pub-id>
</mixed-citation></ref><ref id="R82"><label>(82)</label><mixed-citation publication-type="journal"><name><surname>Ravaliya</surname><given-names>K</given-names></name>; <name><surname>Gentry-Shields</surname><given-names>J</given-names></name>; <name><surname>Garcia</surname><given-names>S</given-names></name>; <name><surname>Heredia</surname><given-names>N</given-names></name>; <name><surname>Fabiszewski De Aceituno</surname><given-names>A</given-names></name>; <name><surname>Bartz</surname><given-names>FE</given-names></name>; <name><surname>Leon</surname><given-names>JS</given-names></name>; <name><surname>Jaykus</surname><given-names>L-A</given-names></name>
<article-title>Use of Bacteroidales Microbial Source Tracking To Monitor Fecal Contamination in Fresh Produce Production</article-title>. <source>Appl. Environ. Microbiol</source>
<year>2014</year>, <volume>80</volume> (<issue>2</issue>), <fpage>612</fpage>&#x02013;<lpage>617</lpage>.<pub-id pub-id-type="pmid">24212583</pub-id>
</mixed-citation></ref><ref id="R83"><label>(83)</label><mixed-citation publication-type="journal"><name><surname>Ahmed</surname><given-names>W</given-names></name>; <name><surname>Payyappat</surname><given-names>S</given-names></name>; <name><surname>Cassidy</surname><given-names>M</given-names></name>; <name><surname>Harrison</surname><given-names>N</given-names></name>; <name><surname>Besley</surname><given-names>C</given-names></name>
<article-title>Sewage-Associated Marker Genes Illustrate the Impact of Wet Weather Overflows and Dry Weather Leakage in Urban Estuarine Waters of Sydney, Australia</article-title>. <source>Sci. Total Environ</source>
<year>2020</year>, <volume>705</volume>, <comment>No. 135390.</comment></mixed-citation></ref><ref id="R84"><label>(84)</label><mixed-citation publication-type="journal"><name><surname>Schiff</surname><given-names>K</given-names></name>; <name><surname>Griffith</surname><given-names>J</given-names></name>; <name><surname>Steele</surname><given-names>J</given-names></name>; <name><surname>Zimmer-Faust</surname><given-names>A</given-names></name>
<article-title>Dry and Wet Weather Survey for Human Fecal Sources in the San Diego River Watershed</article-title>. <source>Water</source>
<year>2023</year>, <volume>15</volume> (<issue>12</issue>), <fpage>2239</fpage>.</mixed-citation></ref><ref id="R85"><label>(85)</label><mixed-citation publication-type="journal"><name><surname>Murphy</surname><given-names>HM</given-names></name>; <name><surname>McGinnis</surname><given-names>S</given-names></name>; <name><surname>Blunt</surname><given-names>R</given-names></name>; <name><surname>Stokdyk</surname><given-names>J</given-names></name>; <name><surname>Wu</surname><given-names>J</given-names></name>; <name><surname>Cagle</surname><given-names>A</given-names></name>; <name><surname>Denno</surname><given-names>DM</given-names></name>; <name><surname>Spencer</surname><given-names>S</given-names></name>; <name><surname>Firnstahl</surname><given-names>A</given-names></name>; <name><surname>Borchardt</surname><given-names>MA</given-names></name>
<article-title>Septic Systems and Rainfall Influence Human Fecal Marker and Indicator Organism Occurrence in Private Wells in Southeastern Pennsylvania</article-title>. <source>Environ. Sci. Technol</source>
<year>2020</year>, <volume>54</volume> (<issue>6</issue>), <fpage>3159</fpage>&#x02013;<lpage>3168</lpage>.<pub-id pub-id-type="pmid">32073835</pub-id>
</mixed-citation></ref><ref id="R86"><label>(86)</label><mixed-citation publication-type="journal"><name><surname>Stea</surname><given-names>EC</given-names></name>; <name><surname>Truelstrup Hansen</surname><given-names>L</given-names></name>; <name><surname>Jamieson</surname><given-names>RC</given-names></name>; <name><surname>Yost</surname><given-names>CK</given-names></name>
<article-title>Fecal Contamination in the Surface Waters of a Rural- and an Urban-Source Watershed</article-title>. <source>J. Environ. Qual</source>
<year>2015</year>, <volume>44</volume> (<issue>5</issue>), <fpage>1556</fpage>&#x02013;<lpage>1567</lpage>.<pub-id pub-id-type="pmid">26436273</pub-id>
</mixed-citation></ref><ref id="R87"><label>(87)</label><mixed-citation publication-type="journal"><name><surname>Thoe</surname><given-names>W</given-names></name>; <name><surname>Gold</surname><given-names>M</given-names></name>; <name><surname>Griesbach</surname><given-names>A</given-names></name>; <name><surname>Grimmer</surname><given-names>M</given-names></name>; <name><surname>Taggart</surname><given-names>ML</given-names></name>; <name><surname>Boehm</surname><given-names>AB</given-names></name>
<article-title>Predicting Water Quality at Santa Monica Beach: Evaluation of Five Different Models for Public Notification of Unsafe Swimming Conditions</article-title>. <source>Water Res</source>
<year>2014</year>, <volume>67</volume>, <fpage>105</fpage>&#x02013;<lpage>117</lpage>.<pub-id pub-id-type="pmid">25262555</pub-id>
</mixed-citation></ref><ref id="R88"><label>(88)</label><mixed-citation publication-type="journal"><name><surname>Whitman</surname><given-names>RL</given-names></name>; <name><surname>Nevers</surname><given-names>MB</given-names></name>; <name><surname>Korinek</surname><given-names>GC</given-names></name>; <name><surname>Byappanahalli</surname><given-names>MN</given-names></name>
<article-title>Solar and Temporal Effects on <italic toggle="yes">Escherichia coli</italic> Concentration at a Lake Michigan Swimming Beach</article-title>. <source>Appl. Environ. Microbiol</source>
<year>2004</year>, <volume>70</volume> (<issue>7</issue>), <fpage>4276</fpage>&#x02013;<lpage>4285</lpage>.<pub-id pub-id-type="pmid">15240311</pub-id>
</mixed-citation></ref><ref id="R89"><label>(89)</label><mixed-citation publication-type="journal"><name><surname>Heasley</surname><given-names>C</given-names></name>; <name><surname>Sanchez</surname><given-names>JJ</given-names></name>; <name><surname>Tustin</surname><given-names>J</given-names></name>; <name><surname>Young</surname><given-names>I</given-names></name>
<article-title>Systematic Review of Predictive Models of Microbial Water Quality at Freshwater Recreational Beaches</article-title>. <source>PLoS One</source>
<year>2021</year>, <volume>16</volume> (<issue>8</issue>), <comment>No. e0256785.</comment></mixed-citation></ref><ref id="R90"><label>(90)</label><mixed-citation publication-type="journal"><name><surname>Walters</surname><given-names>E</given-names></name>; <name><surname>Graml</surname><given-names>M</given-names></name>; <name><surname>Behle</surname><given-names>C</given-names></name>; <name><surname>M&#x000fc;ller</surname><given-names>E</given-names></name>; <name><surname>Horn</surname><given-names>H</given-names></name>
<article-title>Influence of Particle Association and Suspended Solids on UV Inactivation of Fecal Indicator Bacteria in an Urban River</article-title>. <source>Water, Air, Soil Pollut</source>
<year>2014</year>, <volume>225</volume> (<issue>1</issue>), <fpage>1822</fpage>.</mixed-citation></ref><ref id="R91"><label>(91)</label><mixed-citation publication-type="journal"><name><surname>Cole</surname><given-names>JJ</given-names></name>
<article-title>Interactions Between Bacteria and Algae in Aquatic Ecosystems</article-title>. <source>Annu. Rev. Ecol. Syst</source>
<year>1982</year>, <volume>13</volume> (<issue>1</issue>), <fpage>291</fpage>&#x02013;<lpage>314</lpage>.</mixed-citation></ref><ref id="R92"><label>(92)</label><mixed-citation publication-type="journal"><name><surname>Luo</surname><given-names>Z</given-names></name>; <name><surname>Gu</surname><given-names>G</given-names></name>; <name><surname>Ginn</surname><given-names>A</given-names></name>; <name><surname>Giurcanu</surname><given-names>MC</given-names></name>; <name><surname>Adams</surname><given-names>P</given-names></name>; <name><surname>Vellidis</surname><given-names>G</given-names></name>; <name><surname>Van Bruggen</surname><given-names>AHC</given-names></name>; <name><surname>Danyluk</surname><given-names>MD</given-names></name>; <name><surname>Wright</surname><given-names>AC</given-names></name>
<article-title>Distribution and Characterization of <italic toggle="yes">Salmonella enterica</italic> Isolates from Irrigation Ponds in the Southeastern United States</article-title>. <source>Appl. Environ. Microbiol</source>
<year>2015</year>, <volume>81</volume> (<issue>13</issue>), <fpage>4376</fpage>&#x02013;<lpage>4387</lpage>.<pub-id pub-id-type="pmid">25911476</pub-id>
</mixed-citation></ref><ref id="R93"><label>(93)</label><mixed-citation publication-type="journal"><name><surname>Gu</surname><given-names>G</given-names></name>; <name><surname>Luo</surname><given-names>Z</given-names></name>; <name><surname>Cevallos-Cevallos</surname><given-names>JM</given-names></name>; <name><surname>Adams</surname><given-names>P</given-names></name>; <name><surname>Vellidis</surname><given-names>G</given-names></name>; <name><surname>Wright</surname><given-names>A</given-names></name>; <name><surname>Van Bruggen</surname><given-names>AHC</given-names></name>
<article-title>Factors Affecting the Occurrence of <italic toggle="yes">Escherichia coli</italic> O157 Contamination in Irrigation Ponds on Produce Farms in the Suwannee River Watershed</article-title>. <source>Can. J. Microbiol</source>
<year>2013</year>, <volume>59</volume> (<issue>3</issue>), <fpage>175</fpage>&#x02013;<lpage>182</lpage>.<pub-id pub-id-type="pmid">23540335</pub-id>
</mixed-citation></ref><ref id="R94"><label>(94)</label><mixed-citation publication-type="journal"><name><surname>Gu</surname><given-names>G</given-names></name>; <name><surname>Luo</surname><given-names>Z</given-names></name>; <name><surname>Cevallos-Cevallos</surname><given-names>JM</given-names></name>; <name><surname>Adams</surname><given-names>P</given-names></name>; <name><surname>Vellidis</surname><given-names>G</given-names></name>; <name><surname>Wright</surname><given-names>A</given-names></name>; <name><surname>Van Bruggen</surname><given-names>AHC</given-names></name>
<article-title>Occurrence and Population Density of <italic toggle="yes">Campylobacter jejuni</italic> in Irrigation Ponds on Produce Farms in the Suwannee River Watershed</article-title>. <source>Can. J. Microbiol</source>
<year>2013</year>, <volume>59</volume> (<issue>5</issue>), <fpage>339</fpage>&#x02013;<lpage>346</lpage>.<pub-id pub-id-type="pmid">23647347</pub-id>
</mixed-citation></ref><ref id="R95"><label>(95)</label><mixed-citation publication-type="journal"><name><surname>Li</surname><given-names>B</given-names></name>; <name><surname>Vellidis</surname><given-names>G</given-names></name>; <name><surname>Liu</surname><given-names>H</given-names></name>; <name><surname>Jay-Russell</surname><given-names>M</given-names></name>; <name><surname>Zhao</surname><given-names>S</given-names></name>; <name><surname>Hu</surname><given-names>Z</given-names></name>; <name><surname>Wright</surname><given-names>A</given-names></name>; <name><surname>Elkins</surname><given-names>CA</given-names></name>
<article-title>Diversity and Antimicrobial Resistance of <italic toggle="yes">Salmonella enterica</italic> Isolates from Surface Water in Southeastern United States</article-title>. <source>Appl. Environ. Microbiol</source>
<year>2014</year>, <volume>80</volume> (<issue>20</issue>), <fpage>6355</fpage>&#x02013;<lpage>6365</lpage>.<pub-id pub-id-type="pmid">25107969</pub-id>
</mixed-citation></ref><ref id="R96"><label>(96)</label><mixed-citation publication-type="journal"><name><surname>Polat</surname><given-names>H</given-names></name>; <name><surname>Topalcengiz</surname><given-names>Z</given-names></name>; <name><surname>Danyluk</surname><given-names>MD</given-names></name>
<article-title>Prediction of <italic toggle="yes">Salmonella</italic> Presence and Absence in Agricultural Surface Waters by Artificial Intelligence Approaches</article-title>. <source>J. Food Saf</source>
<year>2020</year>, <volume>40</volume> (<issue>1</issue>), <comment>No. e12733.</comment></mixed-citation></ref><ref id="R97"><label>(97)</label><mixed-citation publication-type="journal"><name><surname>Brooks</surname><given-names>W</given-names></name>; <name><surname>Corsi</surname><given-names>S</given-names></name>; <name><surname>Fienen</surname><given-names>M</given-names></name>; <name><surname>Carvin</surname><given-names>R</given-names></name>
<article-title>Predicting Recreational Water Quality Advisories: A Comparison of Statistical Methods</article-title>. <source>Environ. Model. Softw</source>
<year>2016</year>, <volume>76</volume>, <fpage>81</fpage>&#x02013;<lpage>94</lpage>.</mixed-citation></ref><ref id="R98"><label>(98)</label><mixed-citation publication-type="journal"><name><surname>Christodoulou</surname><given-names>E</given-names></name>; <name><surname>Ma</surname><given-names>J</given-names></name>; <name><surname>Collins</surname><given-names>GS</given-names></name>; <name><surname>Steyerberg</surname><given-names>EW</given-names></name>; <name><surname>Verbakel</surname><given-names>JY</given-names></name>; <name><surname>Van Calster</surname><given-names>B</given-names></name>
<article-title>A Systematic Review Shows No Performance Benefit of Machine Learning over Logistic Regression for Clinical Prediction Models</article-title>. <source>J. Clin. Epidemiol</source>
<year>2019</year>, <volume>110</volume>, <fpage>12</fpage>&#x02013;<lpage>22</lpage>.<pub-id pub-id-type="pmid">30763612</pub-id>
</mixed-citation></ref><ref id="R99"><label>(99)</label><mixed-citation publication-type="journal"><name><surname>Ahmed</surname><given-names>W</given-names></name>; <name><surname>Payyappat</surname><given-names>S</given-names></name>; <name><surname>Cassidy</surname><given-names>M</given-names></name>; <name><surname>Harrison</surname><given-names>N</given-names></name>; <name><surname>Besley</surname><given-names>C</given-names></name>
<article-title>Microbial Source Tracking of Untreated Human Wastewater and Animal Scats in Urbanized Estuarine Waters</article-title>. <source>Sci. Total Environ</source>
<year>2023</year>, <volume>877</volume>, <comment>No. 162764.</comment></mixed-citation></ref><ref id="R100"><label>(100)</label><mixed-citation publication-type="journal"><name><surname>Holcomb</surname><given-names>DA</given-names></name>; <name><surname>Knee</surname><given-names>J</given-names></name>; <name><surname>Capone</surname><given-names>D</given-names></name>; <name><surname>Sumner</surname><given-names>T</given-names></name>; <name><surname>Adriano</surname><given-names>Z</given-names></name>; <name><surname>Nal&#x000e1;</surname><given-names>R</given-names></name>; <name><surname>Cumming</surname><given-names>O</given-names></name>; <name><surname>Brown</surname><given-names>J</given-names></name>; <name><surname>Stewart</surname><given-names>JR</given-names></name>
<article-title>Impacts of an Urban Sanitation Intervention on Fecal Indicators and the Prevalence of Human Fecal Contamination in Mozambique</article-title>. <source>Environ. Sci. Technol</source>
<year>2021</year>, <volume>55</volume> (<issue>17</issue>), <fpage>11667</fpage>&#x02013;<lpage>11679</lpage>.<pub-id pub-id-type="pmid">34382777</pub-id>
</mixed-citation></ref><ref id="R101"><label>(101)</label><mixed-citation publication-type="journal"><name><surname>Nshimyimana</surname><given-names>JP</given-names></name>; <name><surname>Cruz</surname><given-names>MC</given-names></name>; <name><surname>Thompson</surname><given-names>RJ</given-names></name>; <name><surname>Wuertz</surname><given-names>S</given-names></name>
<article-title>Bacteroidales Markers for Microbial Source Tracking in Southeast Asia</article-title>. <source>Water Res</source>
<year>2017</year>, <volume>118</volume>, <fpage>239</fpage>&#x02013;<lpage>248</lpage>.<pub-id pub-id-type="pmid">28433694</pub-id>
</mixed-citation></ref></ref-list></back><floats-group><fig position="float" id="F1"><label>Figure 1.</label><caption><p id="P46">Odds ratio (95% confidence interval) estimates for exposure variables in the final mixed-effects logistic regression models for the three fecal indicators, HF183 (A), human fecal indicator (HFI; HF183 and crAssphage) (B), and <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL (C).</p></caption><graphic xlink:href="nihms-2041601-f0002" position="float"/></fig><fig position="float" id="F2"><label>Figure 2.</label><caption><p id="P47">Conditional variable importance for each conditional random forest model (CRF): HF183 (A), human fecal indicator (HFI; HF183 and/or crAssphage) (B), and <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL (C). The <italic toggle="yes">y</italic>-axis shows the explanatory variables ranked from most important to least important. The <italic toggle="yes">x</italic>-axis shows the variable importance on the basis of reduction in the area under the curve (AUC) by conditional permutation; higher relative variable importance indicates stronger association between the variable and the outcome. Variable importance &#x02264;0 indicates negligible association.</p></caption><graphic xlink:href="nihms-2041601-f0003" position="float"/></fig><fig position="float" id="F3"><label>Figure 3.</label><caption><p id="P48">Receiver operating characteristic (ROC) curves (black lines) for logistic regression (top row) and conditional random forest (CRF, bottom row) model predictions of HF183 (A), human fecal indicator (HFI; HF183 and/or FRNA GII coliphage) (B), and <italic toggle="yes">E. coli</italic> &#x02265; 126 MPN/100 mL (C) in the test data set (2015&#x02013;2016). The area under the curve (AUC) summarizes overall predictive performance, and the classification threshold is the predicted probability that minimizes misclassification, corresponding to the blue point on the ROC curve. The red-dashed line represents the performance of an unskilled classifier (no discriminatory ability) with an AUC of 0.5.</p></caption><graphic xlink:href="nihms-2041601-f0004" position="float"/></fig><table-wrap position="float" id="T1" orientation="landscape"><label>Table 1.</label><caption><p id="P49">Fecal Indicator Occurrence and Building Presence by Pond in the Training (2020&#x02013;2021) and Test (2015&#x02013;2016) Data Sets</p></caption><table frame="void" rules="none"><colgroup span="1"><col align="left" valign="middle" span="1"/><col align="left" valign="middle" span="1"/><col align="left" valign="middle" span="1"/><col align="left" valign="middle" span="1"/><col align="left" valign="middle" span="1"/><col align="left" valign="middle" span="1"/><col align="left" valign="middle" span="1"/></colgroup><thead><tr><th align="left" valign="bottom" rowspan="1" colspan="1">data set</th><th align="center" valign="bottom" rowspan="1" colspan="1">pond</th><th align="center" valign="bottom" rowspan="1" colspan="1">building (Y/N)</th><th align="center" valign="bottom" rowspan="1" colspan="1">no. HF183 detection (%)</th><th align="center" valign="bottom" rowspan="1" colspan="1">no. human-associated phage<sup><xref rid="TFN1" ref-type="table-fn">a</xref></sup> detection (%)</th><th align="center" valign="bottom" rowspan="1" colspan="1">no. HF183 and phage codetection (%)</th><th align="center" valign="bottom" rowspan="1" colspan="1">no. generic <italic toggle="yes">E. coli</italic> &#x02265;126 MPN/100 mL (%)</th></tr></thead><tbody><tr><td align="left" valign="bottom" rowspan="1" colspan="1">training</td><td align="center" valign="bottom" rowspan="1" colspan="1">A1</td><td align="center" valign="bottom" rowspan="1" colspan="1">Y</td><td align="center" valign="bottom" rowspan="1" colspan="1">12 (44)</td><td align="center" valign="bottom" rowspan="1" colspan="1">3 (11)</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (7)</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (7)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1"/><td align="center" valign="bottom" rowspan="1" colspan="1">A2</td><td align="center" valign="bottom" rowspan="1" colspan="1">Y</td><td align="center" valign="bottom" rowspan="1" colspan="1">7 (26)</td><td align="center" valign="bottom" rowspan="1" colspan="1">1 (4)</td><td align="center" valign="bottom" rowspan="1" colspan="1">1 (4)</td><td align="center" valign="bottom" rowspan="1" colspan="1">1 (4)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1"/><td align="center" valign="bottom" rowspan="1" colspan="1">A3</td><td align="center" valign="bottom" rowspan="1" colspan="1">Y</td><td align="center" valign="bottom" rowspan="1" colspan="1">14 (52)</td><td align="center" valign="bottom" rowspan="1" colspan="1">6 (22)</td><td align="center" valign="bottom" rowspan="1" colspan="1">5 (19)</td><td align="center" valign="bottom" rowspan="1" colspan="1">4 (15)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1"/><td align="center" valign="bottom" rowspan="1" colspan="1">A4</td><td align="center" valign="bottom" rowspan="1" colspan="1">Y</td><td align="center" valign="bottom" rowspan="1" colspan="1">26 (96)</td><td align="center" valign="bottom" rowspan="1" colspan="1">0 (0)</td><td align="center" valign="bottom" rowspan="1" colspan="1">0 (0)</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (7)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1"/><td align="center" valign="bottom" rowspan="1" colspan="1">B1</td><td align="center" valign="bottom" rowspan="1" colspan="1">N</td><td align="center" valign="bottom" rowspan="1" colspan="1">1 (4)</td><td align="center" valign="bottom" rowspan="1" colspan="1">0 (0)</td><td align="center" valign="bottom" rowspan="1" colspan="1">0 (0)</td><td align="center" valign="bottom" rowspan="1" colspan="1">1 (4)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1"/><td align="center" valign="bottom" rowspan="1" colspan="1">B2<sup><xref rid="TFN2" ref-type="table-fn">b</xref></sup></td><td align="center" valign="bottom" rowspan="1" colspan="1">N</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (7)</td><td align="center" valign="bottom" rowspan="1" colspan="1">0 (0)</td><td align="center" valign="bottom" rowspan="1" colspan="1">0 (0)</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (7)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1"/><td align="center" valign="bottom" rowspan="1" colspan="1">B3</td><td align="center" valign="bottom" rowspan="1" colspan="1">N</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (7)</td><td align="center" valign="bottom" rowspan="1" colspan="1">0 (0)</td><td align="center" valign="bottom" rowspan="1" colspan="1">0 (0)</td><td align="center" valign="bottom" rowspan="1" colspan="1">4 (15)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1"/><td align="center" valign="bottom" rowspan="1" colspan="1">B4</td><td align="center" valign="bottom" rowspan="1" colspan="1">Y</td><td align="center" valign="bottom" rowspan="1" colspan="1">7 (26)</td><td align="center" valign="bottom" rowspan="1" colspan="1">4 (15)</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (7)</td><td align="center" valign="bottom" rowspan="1" colspan="1">6 (22)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1">test</td><td align="center" valign="bottom" rowspan="1" colspan="1">LV<sup><xref rid="TFN3" ref-type="table-fn">c</xref></sup></td><td align="center" valign="bottom" rowspan="1" colspan="1">Y</td><td align="center" valign="bottom" rowspan="1" colspan="1">8 (35)</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (9)</td><td align="center" valign="bottom" rowspan="1" colspan="1">1 (4)</td><td align="center" valign="bottom" rowspan="1" colspan="1">0 (0)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1"/><td align="center" valign="bottom" rowspan="1" colspan="1">NP</td><td align="center" valign="bottom" rowspan="1" colspan="1">Y</td><td align="center" valign="bottom" rowspan="1" colspan="1">8 (36)</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (9)</td><td align="center" valign="bottom" rowspan="1" colspan="1">1 (5)</td><td align="center" valign="bottom" rowspan="1" colspan="1">4 (18)</td></tr><tr><td align="left" valign="bottom" rowspan="1" colspan="1"/><td align="center" valign="bottom" rowspan="1" colspan="1">SC</td><td align="center" valign="bottom" rowspan="1" colspan="1">Y</td><td align="center" valign="bottom" rowspan="1" colspan="1">7 (32)</td><td align="center" valign="bottom" rowspan="1" colspan="1">3 (14)</td><td align="center" valign="bottom" rowspan="1" colspan="1">3 (14)</td><td align="center" valign="bottom" rowspan="1" colspan="1">2 (9)</td></tr></tbody></table><table-wrap-foot><fn id="TFN1"><label>a</label><p id="P50">crAssphage was assessed in the training data set and FRNA GII coliphage was assessed in the test data set.</p></fn><fn id="TFN2"><label>b</label><p id="P51">Sample size was 27 for each training data set pond except B2, from which 28 samples were collected.</p></fn><fn id="TFN3"><label>c</label><p id="P52">Sample size was 22 for test data set ponds NP and SC and 23 for pond LV.</p></fn></table-wrap-foot></table-wrap></floats-group></article>