requireNamespace("readr") requireNamespace("emld") requireNamespace("dplyr") # Be sure that the testData is not already in tempdir testDataPath <- file.info(list.files(tempdir(), full.names = T, pattern = "USGS_formatted_|USGS_attribute_", recursive = TRUE)) unlink(rownames(testDataPath)) # Create a test USGS dataset and save it to a temporary location USGS_testData <- dplyr::tribble( ~scientificName, ~identifiedBy, ~dateIdentified, ~decimalLatitude, ~decimalLongitude, ~stateProvince, ~country, ~county, ~municipality, ~eventDate, ~eventTime, ~recordedBy, ~eventID, ~Location, ~samplingProtocol, ~samplingEffort, ~coordinateUncertaintyInMeters, ~datasetID, ~institutionCode, ~datasetName, ~occurrenceID, ~recordId, ~fieldNotes, ~sex, ~id, ~dataSource, "Lasioglossum imitatum", "Sam Droege", "1/1/2005", 35.5917, -83.0602, "North Carolina", "USA", "Swain", "Great Smoky Mountains", "2002-04-02T10:30:00Z", 2.00811e+13, "Harold W. Ikerd", "USGS_DRO337", "Purchase Knob", "Technique used: pan trap|Bowls full upon collection: 5|Sampling Bowl/trap size: bowl 6.0oz|Trap colour: yellow-uv|Trap liquid: soap dawn", NA, 3L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO000001", "USGS_DRO000001", "field_note: Field no blooms|DateEntered: 1/27/2017 9:35:36|DateScanned: 1/1/1900|ipAddress: 134.192.250.22|position: 23|time1: 2002-04-02 10:30:00|time2: 2002-04-02 16:30:00", "f", "USGS_DRO000001", "USGS_data", NA, NA, NA, 35.5917, -83.0602, "North Carolina", "USA", "Swain", "Great Smoky Mountains", "2002-04-02T12:00:00Z", 2.00811e+13, "Harold W. Ikerd", "USGS_DRO339", "Purchase Knob", "Technique used: pan trap|Bowls full upon collection: 5|Sampling Bowl/trap size: bowl 12.0oz|Trap colour: pale blue|Trap liquid: soap dawn", NA, 3L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO000013", "USGS_DRO000013", "field_note: edge FX ;on road|ipAddress: 134.192.250.22|position: 23|time1: 2002-04-02 12:00:00|time2: 2002-04-02 17:30:00", NA, "USGS_DRO000013", "USGS_data", "Andrena rugosa", "John Ascher", "10/1/200", 35.6114, -83.5429, "Tennessee", "USA", "Blount", "Great Smoky Mountains", "2002-04-01T09:00:00Z", 2.00811e+13, "Harold W. Ikerd", "USGS_DRO283", "Goshen Prog", "Technique used: pan trap|Bowls full upon collection: 5|Sampling Bowl/trap size: bowl 6.0oz|Trap colour: yellow-uv|Trap liquid: soap dawn", NA, 4L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO000376", "USGS_DRO000376", "field_note: Phacelia fimbriata;Claytonia caroliniana;Erythronium americanum;Violoa rotundifolia; V. hastata; V.sororia; V.pedata;Violoa rotundifolia|DateEntered: 1/27/2017 9:35:36|DateScanned: 12/27/20|ipAddress: 172.130.3.67|position: 00|time1: 2002-04-01 09:00:00|time2: 2002-04-01 15:30:00", "f", "USGS_DRO000376", "USGS_data", "Plenoculus davisi_atlanticus", "Matthias Buck", "1/1/2009", 39.0507167, -76.7778167, "Maryland", "USA", "Anne Arundel", "Patuxent Wildlife Research Refuge", "2002-05-24T12:00:00Z", 2.00811e+13, "Harold W. Ikerd", "USGS_DRO309", NA, "Technique used: pan trap|Bowls full upon collection: 5|Sampling Bowl/trap size: bowl 12.0oz|Trap colour: pale blue-uv|Trap liquid: soap dawn", NA, 4L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO001232", "USGS_DRO001232", "field_note: sandy area under powerline on north track|DateEntered: 1/27/2017 9:35:36|DateScanned: 4/21/200|ipAddress: 172.130.3.67|position: 04|time1: 2002-05-24 12:00:00|time2: 2002-05-25 11:00:00", "m", "USGS_DRO001232", "USGS_data", "Chrysura kyrae/pacifica", "Erika Tucker", "12/6/200", 39.0457, -76.7896, "Maryland", "USA", "Prince George's", "Patuxent Wildlife Research Refuge", "2002-04-30T09:20:00Z", 2.0081e+13, "Sam Droege", "USGS_DRO70", NA, "Technique used: pan trap|Bowls full upon collection: 20|Sampling Bowl/trap size: bowl 12.0oz|Trap colour: white|Trap liquid: in field note", NA, 4L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO002016", "USGS_DRO002016", "field_note: Blooming-a few sping beauties, mertensia, garlic mustard, not much else;habitat-bottomland;other-detergent test;other-sunny/wind 4mph/60's;test failed 4x5=20 bowls put out but sharpie permanent marker failed.|DateEntered: 1/27/2017 9:35:36|DateScanned: 12/9/200|ipAddress: 159.189.28.40|position: 04|time1: 2002-04-30 09:20:00|time2: 2002-04-30 00:00:00", "u", "USGS_DRO002016", "USGS_data", "Melissodes subillatus", "Karen Wright", "7/1/2016", 38.909, -76.683, "Maryland", "USA", "Prince George's", NA, "2002-06-18T00:00:00Z", 2.00811e+13, NA, "USGS_DRO376", NA, "Technique used: pan trap|Bowls full upon collection: 5|Sampling Bowl/trap size: bowl 12.0oz|Trap colour: blue|Trap liquid: soap dawn", NA, 3L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO002363", "USGS_DRO002363", "field_note: student conservation folks|DateEntered: 1/27/2017 9:35:36|DateScanned: 7/12/201|ipAddress: 134.192.250.22|position: 04|time1: 2002-06-18 00:00:00|time2: 2002-06-18 00:00:00", "f", "USGS_DRO002363", "USGS_data", "Augochloropsis metallica_metallica", "Michael Orr", "3/8/2011", 38.909, -76.683, "Maryland", "USA", "Prince George's", NA, "2002-06-18T00:00:00Z", 2.00811e+13, NA, "USGS_DRO377", NA, "Technique used: pan trap|Bowls full upon collection: 5|Sampling Bowl/trap size: bowl 12.0oz|Trap colour: white|Trap liquid: soap dawn", NA, 3L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO002395", "USGS_DRO002395", "field_note: student conservation folks|DateEntered: 1/27/2017 9:35:36|DateScanned: 3/8/2011|ipAddress: 134.192.250.22|position: 04|time1: 2002-06-18 00:00:00|time2: 2002-06-18 00:00:00", "f", "USGS_DRO002395", "USGS_data", "Osmia marilaunidii", "Harold Ikerd", "5/31/201", 33.3404, -97.1366, "Texas", "USA", "Denton", "RVAC", "2002-04-27T00:00:00Z", 2.01605e+13, "Harold W. Ikerd", "USGS_DRO480", NA, "Technique used: pan trap|Bowls full upon collection: 5|Sampling Bowl/trap size: bowl 12.0oz|Trap colour: pale blue", NA, 4L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO004865", "USGS_DRO004865", "DateEntered: 1/27/2017 9:35:36|DateScanned: 5/31/201|ipAddress: 159.189.28.152|position: 08|time1: 2002-04-27 00:00:00|time2: 2002-04-27 00:00:00", "f", "USGS_DRO004865", "USGS_data", "Lasioglossum droegei", "Jason Gibbs", "12/9/201", 29.1361, -103.1786, "Texas", "USA", "Brewster", "Big Bend National Park", "2002-05-01T06:00:00Z", 2.00811e+13, "Harold W. Ikerd", "USGS_DRO856", NA, "Technique used: pan trap|Bowls full upon collection: 5|Sampling Bowl/trap size: bowl 12.0oz|Trap colour: blue|Trap liquid: soap dawn", NA, 4L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO010505", "USGS_DRO010505", "DateEntered: 1/27/2017 9:35:36|DateScanned: 12/13/20|ipAddress: 134.192.250.8|position: 56|time1: 2002-05-01 06:00:00|time2: 2002-05-02 12:00:00", "f", "USGS_DRO010505", "USGS_data", "Osmia atriventris", "Molly Rightmyer", "10/19/20", 38.6916, -77.0573, "Maryland", "USA", "Prince George's", "Piscataway", "2003-04-02T16:40:00Z", 2.00811e+13, "Sam Droege", "USGS_DRO954", "25", "Technique used: pan trap|Bowls full upon collection: 30|Sampling Bowl/trap size: bowl 3.25oz|Trap liquid: soap dawn", NA, 4L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO016225", "USGS_DRO016225", "field_note: 15 w 15 y, 0 missing, irregularly mown field|note: Transect 25|DateEntered: 1/27/2017 9:35:36|DateScanned: 10/20/20|ipAddress: 134.192.250.8|position: 04|time1: 2003-04-02 16:40:00|time2: 2003-04-03 17:40:00", "m", "USGS_DRO016225", "USGS_data", "Epeolus scutellaris", "Rebekah Andrus Nelson", "9/1/2007", 41.0704, -71.8582, "New York", "USA", "Suffolk", "Montauk Point State Park", "2005-09-06T00:00:00Z", 2.00812e+13, "Sam Droege", "USGS_DRO2169", NA, "Technique used: hand net", NA, 4L, "USGS_DRO", "USGS", "USGS_DRO database", "USGS_DRO039487", "USGS_DRO039487", "field_note: sunny in the low 80's fair amount of wind;collected around the lighthouse on the point as well as on the bluff immediately adjacent on the Camp Hero Site. Collected primarily off of goldenrod.|DateEntered: 1/27/2017 9:35:36|DateScanned: 10/9/200|ipAddress: 172.162.21.134|position: 09|time1: 2005-09-06 00:00:00|time2: 2005-09-06 00:00:00", "m", "USGS_DRO039487", "USGS_data" ) %>% # Save a temporary version of these data readr::write_excel_csv(., paste0(tempdir(), "/USGS_formatted_2023-01-27.csv")) USGS_attrFile <- dplyr::tribble( ~dataSource, ~alternateIdentifier, ~title, ~pubDate, ~dateStamp, ~doi, ~downloadLink, ~abstract, ~citations, ~downloadCitation, ~rights, "USGS_data", "Not provided", "USGS_DRO database", "2022/11/19", "2022/11/19", "Not provided", "Not provided, contact Sam Droege at sdroege@usgs.gov", NA, "Citations not provided", "Sam Droege. (2022-11-19). United States Geological Survey bee data.", "Rights are not provided. Please seek permission for data use from Same Droege." ) %>% readr::write_excel_csv(., paste0(tempdir(), "/USGS_attribute_files2023-01-27.csv")) # Create a test database of occurrences existingTestdb <- dplyr::tribble( ~scientificName, ~family, ~subfamily, ~genus, ~subgenus, ~subspecies, ~species, ~specificEpithet, ~infraspecificEpithet, ~acceptedNameUsage, ~taxonRank, ~scientificNameAuthorship, ~identificationQualifier, ~higherClassification, ~identificationReferences, ~typeStatus, ~previousIdentifications, ~verbatimIdentification, ~identifiedBy, ~dateIdentified, ~decimalLatitude, ~decimalLongitude, ~stateProvince, ~country, ~continent, ~locality, ~island, ~county, ~municipality, ~countryCode, ~level0Gid, ~level0Name, ~level1Gid, ~level1Name, ~license, ~issue, ~eventDate, ~eventTime, ~day, ~month, ~year, ~basisOfRecord, ~type, ~occurrenceStatus, ~recordNumber, ~recordedBy, ~eventID, ~samplingProtocol, ~samplingEffort, ~individualCount, ~organismQuantity, ~coordinatePrecision, ~coordinateUncertaintyInMeters, ~spatiallyValid, ~catalogNumber, ~gbifID, ~datasetID, ~institutionCode, ~datasetName, ~otherCatalogNumbers, ~occurrenceID, ~taxonKey, ~coreid, ~recordId, ~collectionID, ~verbatimScientificName, ~verbatimEventDate, ~associatedTaxa, ~associatedOrganisms, ~fieldNotes, ~sex, ~rights, ~rightsHolder, ~accessRights, ~associatedReferences, ~bibliographicCitation, ~references, ~informationWithheld, ~isDuplicateOf, ~hasCoordinate, ~hasGeospatialIssues, ~assertions, ~occurrenceYear, ~id, ~duplicateStatus, ~associatedOccurrences, ~locationRemarks, ~dataSource, "APIDAE", "Apidae", NA, NA, NA, NA, NA, "planifrons", NA, "Platytrigona planifrons (Smith, 1865)", "family", NA, NA, NA, NA, NA, NA, NA, "Rasmussen, C.", "2007", -7.2, 146.65, "Morobe Province", "Papua New Guinea", NA, NA, NA, NA, NA, "PG", NA, NA, NA, NA, "CC-BY 3.0 (Au)", NA, NA, NA, NA, 6L, 1950L, "PRESERVED_SPECIMEN", NA, "PRESENT", NA, "Faddy, H.", "urn:australianmuseum.net.au:Events:1093889", NA, NA, NA, NA, 0.001, 10000L, TRUE, "K.240452", NA, NA, "AM", NA, "ecatalogue.irn:1212006 | urn:catalog:AM:Entomology:K.240452", "urn:lsid:ozcam.taxonomy.org.au:AM:Entomology:K.240452", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "GEODETIC_DATUM_ASSUMED_WGS84 | MISSING_GEODETICDATUM | MISSING_GEOREFERENCEDBY | MISSING_GEOREFERENCEPROTOCOL | MISSING_GEOREFERENCESOURCES | MISSING_GEOREFERENCEVERIFICATIONSTATUS | MISSING_GEOREFERENCE_DATE | MISSING_TAXONRANK | TAXON_MATCH_HIGHERRANK", "1949-12-31T14:00:00Z", "0a3c9ba7-c666-4a42-8e93-f60789977722", NA, NA, "ecatalogue.LocCollectionEventLocal: \"Papua New Guinea, Morobe Province, Bulolo (7° 12' S, 146° 39' E) /06/1950 - /06/1950, Faddy, H.(Collector)\";", "ALA_Apiformes", "Bombus Latreille, 1802", "Apidae", NA, "Bombus", NA, NA, NA, NA, NA, NA, "GENUS", NA, NA, NA, NA, NA, NA, NA, NA, NA, 44.99712, 1.00448, NA, NA, NA, NA, NA, "Dordogne", "Fleurac", "FR", "FRA", "France", "FRA.10_1", "Nouvelle-Aquitaine", "CC_BY_4_0", "RECORDED_DATE_INVALID", NA, NA, NA, NA, NA, "HUMAN_OBSERVATION", NA, "PRESENT", NA, "(SPIPOLL)", "fcb8e5ba-2a2b-11e9-a1a8-005056010096", NA, NA, NA, NA, NA, 3777L, NA, NA, NA, "5900C02E-AD2B-555E-E053-2614A8C02A2B", NA, NA, NA, "fcb8e5ba-2a2b-11e9-a1a8-005056010096", 1340278L, NA, NA, NA, "Bombus Latreille, 1802", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Géographie soumise à floutage à la demande du producteur|Geographic information generalized during aggregation at the request of the producer", NA, TRUE, FALSE, NA, NA, NA, NA, NA, NA, "GBIF_Apidae", "Bombus pensylvanicus (De Geer, 1773)", "Apidae", NA, "Bombus", NA, NA, "Bombus pensylvanicus", "pensylvanicus", NA, NA, "SPECIES", NA, NA, NA, NA, NA, NA, NA, NA, NA, 33.9315, -93.592, "Arkansas", NA, NA, "3 mi N Hope", NA, "Hempstead", NA, "US", "USA", "United States", "USA.4_1", "Arkansas", "CC_BY_4_0", "GEODETIC_DATUM_ASSUMED_WGS84;INSTITUTION_MATCH_NONE", "1954-08-19T00:00:00", NA, 19L, 8L, 1954L, "PRESERVED_SPECIMEN", NA, "PRESENT", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "BOMBUS32831", 767135245L, NA, "USDA-ARS", NA, NA, "767135245", 1340416L, NA, NA, NA, "Bombus pensylvanicus", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, TRUE, FALSE, NA, NA, NA, NA, NA, NA, "GBIF_Apidae", "Nomada subcornuta (Kirby, 1802)", "Apidae", NA, "Nomada", NA, NA, "Nomada subcornuta", "subcornuta", NA, NA, "SPECIES", NA, NA, NA, NA, NA, NA, NA, "Paukkunen, Juho", NA, 63.05649, 27.68866, "Sb", NA, NA, NA, NA, "Siilinjärvi", NA, "FI", "FIN", "Finland", "FIN.1_1", "Eastern Finland", "CC_BY_4_0", "GEODETIC_DATUM_ASSUMED_WGS84;COORDINATE_PRECISION_INVALID;INSTITUTION_MATCH_FUZZY;INSTITUTION_COLLECTION_MISMATCH", "1954-05-30T00:00:00", NA, 30L, 5L, 1954L, "PRESERVED_SPECIMEN", NA, "PRESENT", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "JP08-1503", 335348142L, NA, "MZH", NA, NA, NA, 7442089L, NA, NA, NA, "Nomada subcornuta", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, TRUE, FALSE, NA, NA, NA, NA, NA, NA, "GBIF_Apidae", "Apis mellifera Linnaeus, 1758", "Apidae", NA, "Apis", NA, NA, "Apis mellifera", "mellifera", NA, NA, "SPECIES", NA, NA, NA, NA, NA, NA, NA, "nina fogel", "2021-09-04T15:41:50", 34.011922, -118.461783, "California", NA, NA, NA, NA, NA, NA, "US", "USA", "United States", "USA.5_1", "California", "CC_BY_NC_4_0", "COORDINATE_ROUNDED", "2021-09-03T10:41:34", "17:41:34Z", 3L, 9L, 2021L, "HUMAN_OBSERVATION", NA, "PRESENT", NA, "yarrow71", NA, NA, NA, NA, NA, NA, 6L, NA, "93593164", NA, NA, "iNaturalist", "iNaturalist research-grade observations", NA, "https://www.inaturalist.org/observations/93593164", 1341976L, NA, NA, NA, "Apis mellifera", "Fri Sep 03 2021 10:41:34 GMT-0700 (PDT)", NA, NA, NA, NA, NA, "yarrow71", NA, NA, NA, "https://www.inaturalist.org/observations/93593164", NA, NA, TRUE, FALSE, NA, NA, NA, NA, NA, NA, "GBIF_Apidae", "Nomada Scopoli, 1770", "Apidae", NA, "Nomada", NA, NA, NA, NA, NA, NA, "GENUS", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Kansas", NA, "NORTH_AMERICA", "unspecified", NA, "Miami", NA, "US", NA, NA, NA, NA, "CC_BY_4_0", "COLLECTION_MATCH_FUZZY", "1951-05-27T00:00:00", NA, 27L, 5L, 1951L, "PRESERVED_SPECIMEN", NA, "PRESENT", NA, "Beer, Robert", NA, NA, NA, NA, NA, NA, NA, NA, "1253091", 894574519L, NA, "KU", "Snow Entomological Museum Collection", NA, "28de669c-8e2d-46ce-bc25-36c0db48aa1b", 1343230L, NA, NA, NA, "Nomada", NA, NA, NA, NA, NA, NA, NA, "http://biodiversity.ku.edu/research/university-kansas-biodiversity-institute-data-publication-and-use-norms", NA, NA, NA, NA, NA, FALSE, FALSE, NA, NA, NA, NA, NA, NA, "GBIF_Apidae", "Hoplitis tuberculata (Nylander, 1848)", "Megachilidae", NA, "Hoplitis", NA, NA, "Hoplitis tuberculata", "tuberculata", NA, NA, "SPECIES", NA, NA, NA, NA, NA, NA, NA, "Paukkunen, Juho", NA, 61.06093, 24.49662, "Ta", NA, NA, NA, NA, "Hämeenlinna", NA, "FI", "FIN", "Finland", "FIN.5_1", "Western Finland", "CC_BY_4_0", "GEODETIC_DATUM_ASSUMED_WGS84;COORDINATE_PRECISION_INVALID;INSTITUTION_MATCH_FUZZY;INSTITUTION_COLLECTION_MISMATCH", "1968-06-12T00:00:00", NA, 12L, 6L, 1968L, "PRESERVED_SPECIMEN", NA, "PRESENT", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "JP09-54463", 335339819L, NA, "MZH", NA, NA, NA, 1337165L, NA, NA, NA, "Hoplitis tuberculata", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, TRUE, FALSE, NA, NA, NA, NA, NA, NA, "GBIF_Megachilidae", "Lasioglossum packeri", "Halictidae", NA, "Lasioglossum", NA, NA, "Lasioglossum packeri", "packeri", NA, NA, "SPECIES", NA, NA, NA, NA, NA, NA, NA, "Sam Droege", NA, 43.7172, -102.1011, "South Dakota", NA, NA, NA, NA, "Pennington", "Badlands National Park", "US", "USA", "United States", "USA.42_1", "South Dakota", "CC0_1_0", "GEODETIC_DATUM_ASSUMED_WGS84;INSTITUTION_MATCH_NONE", "2011-08-19T00:00:00", NA, 19L, 8L, 2011L, "PRESERVED_SPECIMEN", NA, "PRESENT", NA, "Badlands Crew", "USGS_DRO9257", "trapType:pan trap; trapCount:29; trapVolume:bowl 3.25oz; trapColor:in field note; trapLiquid:soap dawn", NA, NA, NA, NA, NA, NA, "SamplingEventNumber:USGS_DRO9257 | SpecimenNumber:USGS_DRO270402", 1456689405L, NA, "BISON", "USGS PWRC - Native Bee Inventory and Monitoring Lab (BIML)", NA, "http://www.discoverlife.org/mp/20l?id=USGS_DRO270402", 10599483L, NA, NA, "https://bison.usgs.gov/ipt/resource?r=usgs-pwrc-biml", "Lasioglossum packeri", "StartDateTime:201108180830xx; EndDateTime:201108190830xx", NA, NA, "\"Sunny, upper 80's, afternoon thunderstorm, moderate wind, 10 y 10 w 10 b, 1 tipped\"", "FEMALE", NA, NA, NA, NA, NA, NA, NA, NA, TRUE, FALSE, NA, NA, NA, NA, NA, NA, "GBIF_Halictidae", "Lasioglossum hoffmanni (Strand, 1915)", "Halictidae", NA, "Lasioglossum", NA, NA, "Lasioglossum hoffmanni", "hoffmanni", NA, NA, "SPECIES", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Kyushu, Fukuoka-ken, Kashii", NA, NA, NA, "JP", NA, NA, NA, NA, "CC_BY_NC_4_0", "INSTITUTION_MATCH_NONE", "1960-03-03T00:00:00", NA, 3L, 3L, 1960L, "PRESERVED_SPECIMEN", NA, "PRESENT", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "21083", 729476432L, NA, "ELKU", NA, NA, "urn:catalog:ELKU:HYM:21083", 1353546L, NA, NA, NA, "Lasioglossum hoffmanni", NA, NA, NA, NA, "FEMALE", NA, NA, NA, NA, NA, NA, NA, NA, FALSE, FALSE, NA, NA, NA, NA, NA, NA, "GBIF_Halictidae", "Andrena nitida (Müller, 1776)", "Andrenidae", NA, "Andrena", NA, NA, "Andrena nitida", "nitida", NA, NA, "SPECIES", NA, NA, NA, NA, NA, NA, NA, "Straka Jakub", NA, 47.28543, 8.26479, "Ag", NA, NA, NA, NA, NA, NA, "CH", "CHE", "Switzerland", "CHE.12_1", "Lucerne", "CC_BY_4_0", NA, "2004-04-26T00:00:00", NA, 26L, 4L, 2004L, "HUMAN_OBSERVATION", NA, "PRESENT", NA, "Wermelinger Beat", NA, "Combined trap", NA, NA, NA, NA, 3535L, NA, "CSCF-API-661285", NA, "CSCF-API", "WSL", "Swiss National Apoidea Databank", NA, "CSCF-API-661285", 1357545L, NA, NA, NA, "Andrena nitida (Müller, 1776)", NA, NA, NA, NA, NA, NA, "Info Fauna - Centre Suisse de Cartographie de la Faune", NA, "Obrist M.K., Wermelinger B., Moretti M., Gossner M.M., Duelli P. 2021. Hymenoptera. EnviDat. http://doi.org/10.16904/envidat.200", NA, NA, NA, NA, TRUE, FALSE, NA, NA, NA, NA, NA, NA, "GBIF_Andrenidae", "Lasioglossum lusoria", "Halictidae", NA, "Lasioglossum", NA, NA, NA, "lusoria", NA, NA, NA, "(Cresson 1872)", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Utah", "USA", NA, "Dugway Proving Grounds; Wig Mt., 7.5 km N (site 35B)", NA, "Tooele", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 20L, 5L, 1998L, "PreservedSpecimen", NA, NA, NA, "T. Toler", NA, NA, NA, NA, NA, NA, NA, NA, "BBSL648975", NA, NA, "USDA-ARS", NA, NA, "658910273", NA, "d02c2ac6-808a-4bc3-b8bb-3983c25cb172", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "iDigBio_halictidae", "Bombus fervidus", "Apidae", NA, "Bombus", NA, NA, NA, "fervidus", NA, NA, NA, "(Fabricius, 1798)", NA, NA, NA, NA, NA, NA, "R.I. Velez-Ruiz", "2012", 44.311357, -96.798388, "South Dakota", "United States", NA, "Brookings", NA, "Brookings", NA, NA, NA, NA, NA, NA, NA, NA, "9/9/1961", NA, 9L, 9L, 1961L, "PreservedSpecimen", NA, NA, NA, "H.C. Severin", NA, NA, NA, NA, NA, NA, 5005L, NA, "3572", NA, NA, "SDSU", NA, NA, "4ccd163a-bea6-46a4-89eb-e1e842532bd9", NA, "513a265c-8d05-44b9-975f-7e0093599fd2", "urn:uuid:4ccd163a-bea6-46a4-89eb-e1e842532bd9", "79a96c8c-5719-4e0e-8a24-d597c48c1d62", NA, "9/9/1961", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "iDigBio_apidae", "Bombus (Thoracobombus) pascuorum floralis (Gmelin, 1790)", "Apidae", NA, "Bombus", "Thoracobombus", NA, NA, "pascuorum", "floralis", NA, "subspecies", "(Gmelin, 1790)", NA, "Hymenoptera|Apidae", NA, NA, NA, NA, NA, NA, NA, NA, "Mecklenburg-Vorpommern, R?gen", "Germany", NA, "Sassnitz", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "1910-08-01/1910-08-31", NA, NA, NA, NA, "PreservedSpecimen", NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, "ZMA.INS.699461", NA, NA, NA, NA, NA, "https://data.biodiversitydata.nl/naturalis/specimen/ZMA.INS.699461", NA, "ec08390b-e77a-461f-9ece-a9e33c63b506", NA, NA, NA, "Aug-10", NA, NA, NA, "male", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "iDigBio_apidae", "Bombus mixtus", "Apidae", NA, NA, NA, NA, NA, NA, NA, NA, NA, "Cresson, 1878", "A", NA, NA, NA, NA, NA, "Jessica J. Rykken", "20/7/2016", 63.74163, -149.35381, "Alaska", "United States", NA, "Denali NPP., Primrose Ridge", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "20/7/2016", NA, 20L, 7L, 2016L, "PreservedSpecimen", NA, NA, NA, "Collector(s): Jessica J. Rykken", NA, "vane trap", NA, 1L, NA, NA, 28L, NA, "UAM:Ento:396686", NA, NA, "UAM", NA, "field number=2-T-V1_160802, U. S. National Park Service accession=DENA-00705, U. S. National Park Service catalog=DENA 49179", "http://arctos.database.museum/guid/UAM:Ento:396686?seid=4282896", NA, NA, "urn:uuid:cc7a8624-31b1-42be-8e19-27e0059e9d2a", "8cafaa53-4c65-4155-ae9c-13e51dabe170", NA, "7/20/2016 - 8/2/2016", NA, NA, NA, "female", "http://creativecommons.org/publicdomain/zero/1.0/", "University of Alaska Museum", NA, NA, NA, "https://scan-bugs.org:443/portal/collections/individual/index.php?occid=39814377", NA, NA, NA, NA, NA, NA, "39814377", NA, NA, NA, "SCAN_Apidae", "Andrena crataegi", "Andrenidae", NA, "Andrena", NA, NA, NA, "crataegi", NA, NA, NA, "Robertson, 1893", NA, NA, NA, NA, NA, NA, NA, "2005", 48.41222, -97.41028, NA, NA, NA, "Grafton", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "7/7/1968", NA, 7L, 7L, 1968L, "PreservedSpecimen", NA, NA, NA, "W.E. LaBerge", NA, NA, NA, 1L, NA, NA, 10000L, NA, "INHS Insect Collection 99514", NA, NA, "INHS", NA, NA, "aefb4875-994f-4466-b0a3-405606d996e0", NA, NA, "urn:uuid:9024adfa-a4dd-4cc3-8c25-3bdd0ca5486e", "d93d943b-2390-4e2c-9050-11a9ec9a2a96", NA, "7/7/1968", NA, NA, NA, NA, "http://creativecommons.org/licenses/by-nc/4.0/", NA, NA, NA, NA, "https://scan-bugs.org:443/portal/collections/individual/index.php?occid=60961242", NA, NA, NA, NA, NA, NA, "60961242", NA, NA, NA, "SCAN_Andrenidae", "Notanthidium adornatum Urban, 1997", "Megachilidae", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Mendoza", "ARGENTINA", NA, "El Sosneado to Bardas Blancas", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "6/12/1983", NA, 6L, 12L, 1983L, "PreservedSpecimen", NA, NA, NA, "L. E. Pena", NA, "Netting", NA, 1L, NA, NA, NA, NA, "AMNH_BEE 00047627", NA, NA, "AMNH", NA, NA, "83f79136-d8e1-11e2-99a2-0026552be7ea", NA, NA, "urn:uuid:b1b0bb31-846a-4575-a6fe-5440697d70e9", "66acc5ec-da39-42c7-811a-824c25f96161", NA, "12/6/1983", NA, NA, NA, "Female", "http://creativecommons.org/licenses/by-nc/4.0/", "AMNH", "Not-for-profit use only", NA, NA, "https://scan-bugs.org:443/portal/collections/individual/index.php?occid=49444799", NA, NA, NA, NA, NA, NA, "49444799", NA, NA, NA, "SCAN_Megachilidae", "Lasioglossum microlepoides", "Halictidae", NA, "Lasioglossum", NA, NA, NA, "microlepoides", NA, NA, NA, "(Ellis, 1914)", NA, NA, NA, NA, NA, NA, "T.L. Griswold 1999", NA, 36, -115, "Nevada", "United States", NA, "withheld", NA, "Clark", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "PRESERVED_SPECIMEN", NA, NA, NA, "M. Andres, K. Keen, K. Receveur, C. Schultz", NA, NA, NA, NA, NA, NA, NA, NA, "BBSL266213", NA, NA, "USDA-ARS", NA, NA, NA, NA, NA, "urn:uuid:da7d3cc7-381c-4a38-be46-d3a7deb4e791", "b870b19f-b0ea-4d3c-b3f0-53fbaf062e0c", NA, NA, NA, NA, NA, NA, "http://creativecommons.org/publicdomain/zero/1.0/", NA, NA, NA, NA, "https://scan-bugs.org:443/portal/collections/individual/index.php?occid=25412520", NA, NA, NA, NA, NA, NA, "25412520", NA, NA, NA, "SCAN_Halictidae" ) # create an empty eml file test_eml <- emld::template("creator") # Run the function Complete_data <- BeeBDC::formattedCombiner(path = paste0(tempdir()), strings = c("USGS_[a-zA-Z_]+[0-9]{4}-[0-9]{2}-[0-9]{2}"), # This should be the list-format with eml attached existingOccurrences = existingTestdb, existingEMLs = test_eml) # Test class expectations testthat::test_that("formattedCombiner expected class", { testthat::expect_type(Complete_data, "list") }) testthat::test_that("formattedCombiner expected class", { testthat::expect_type(Complete_data$Data_WebDL, "list") }) testthat::test_that("formattedCombiner expected class", { testthat::expect_type(Complete_data$eml_files, "list") }) # Test length of total testthat::test_that("formattedCombiner item length", { testthat::expect_equal(length(Complete_data), 2) }) # Test length of rows testthat::test_that("formattedCombiner item length", { testthat::expect_equal(nrow(Complete_data$Data_WebDL), 28) }) # Test against the sub-nrows from input datasets testthat::test_that("formattedCombiner item length", { testthat::expect_equal(nrow(Complete_data$Data_WebDL), sum(nrow(USGS_testData), nrow(existingTestdb))) })