Fishery Landings

Maine DMR landings by county data

  • 2006-2020 specified by county
  • Can run into confidentiality issues when grouped by county/year/species
  • Confidential data is grouped into “Other Maine” or “Other Species” for 2006-2007
  • Maine DMR landings portal (2008-2020) says confidential landings also uses “Other Maine/County”, but data pulled from portal has “Not specified”, “UK”, “Unidentified Catch”. Unclear why or if this is different
  • 446 confidential county records ~28%
  • 31 confidential species records ~ 2%


landings<- read.csv(here("Data/MaineDMR_Modern_Landings_Data_All.csv"))

landings_sum<-select(landings, year, county, species, weight, value)%>%
              group_by(year, county, species)%>%
              summarise(weight=sum(weight), value=sum(value))

landings_06<-read.csv(here("Data/MaineDMR_2006_2007_Landings.csv"))

landings_total<-bind_rows(landings_06, landings_sum)
paged_table(landings_total)
confidential_county<-filter(landings_total, county %in% c("Other Maine", "UK", "Not-Specified"))


confidential_Species<-filter(landings_total, species %in% c("Other Species", "Unidentified Catch"))

trawl_data_update<-read.csv(here("Data/MaineDMR_Trawl_Survey_Catch_Data_2021-05-14.csv"))%>%
  rename(species=COMMON_NAME)

trawl_data_update$species<-str_to_title(trawl_data_update$species)


County

county<-group_by(landings_total, county, year)%>%
      summarise(total_weight=sum(weight, na.rm=TRUE))
paged_table(county)
#plots
p1<-ggplot(data=county, aes(x=year, y=total_weight, color=county))+
  geom_line()+ theme_classic()
p1


Species

species<-group_by(landings_total,species, year)%>%
      summarise(total_weight=sum(weight, na.rm=TRUE))
paged_table(species)


Bubble plots

#pdf(here("Objective 2/Plots/species_bubble_county.pdf"))
for(i in 1:8){

print(ggplot()+
  geom_point(data=species, aes(x=year, y=1, size=total_weight))+
  theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks.y=element_blank())+
  scale_size(range = c(.1,12), name="Weight")+
  facet_wrap_paginate(~species, ncol=2, nrow=4, page=i))
}

#dev.off()

Remove species not caught in trawl

#remove species blanks, other, unclassified
species_county_filter1<-filter(species, species != "")%>%
  filter(species != "Herring Unc")%>%
  filter(species != "Other Species")%>%
  filter(species != "Seaweed Unc")%>%
  filter(species != "Unidentified Catch")

# for(i in 1:7){
# 
# print(ggplot()+
#   geom_point(data=species_filter1, aes(x=year, y=1, size=total_weight))+
#   theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks.y=element_blank())+
#   scale_size(range = c(.1,12), name="Weight")+
#   facet_wrap_paginate(~species, ncol=2, nrow=4, page=i))
# }

remove_county<-anti_join(species_county_filter1, trawl_data_update)
remove_county<-as.data.frame(unique(remove_county$species))
remove_county<-rename(remove_county, species="unique(remove_county$species)")
remove_county
##                           species
## 1                      Bloodworms
## 2                       Clam Soft
## 3                 Clam Surf / Hen
## 4                        Crab Unc
## 5                           Elver
## 6                         Hagfish
## 7       Oyster Eastern / American
## 8               Periwinkle Common
## 9  Periwinkles Atlantic (Cockles)
## 10                       Rockweed
## 11                      Sandworms
## 12                     Sea Urchin
## 13                         Skates
## 14                         Smelts
## 15                      Swordfish
## 16          Tuna Atlantic Bluefin
## 17                    Tuna Bigeye
## 18                     Yellow Eel
species_county_filter2<-anti_join(species_county_filter1, remove_county)


for(i in 1:5){

print(ggplot()+
  geom_point(data=species_county_filter2, aes(x=year, y=1, size=total_weight))+
  theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks.y=element_blank())+
  scale_size(range = c(.1,12), name="Weight")+
  facet_wrap_paginate(~species, ncol=2, nrow=4, page=i))
}


Top 10 species

Maine DMR landings historic data

  • 1950-2019 not grouped by county
  • Yearly totals for select species included
  • Are trends different when county confidentiality is not an issue?


Species

species_full<-group_by(landings_full,species, year)%>%
      summarise(total_weight=sum(total_weight, na.rm=TRUE))%>%
      filter(year>2005)
paged_table(species_full)
unique(species_full$species)
##  [1] "Alewife"                             "Bloodworms"                         
##  [3] "Clam Northern Quahog / Hard"         "Clam Ocean Quahog"                  
##  [5] "Clam Soft"                           "Cod Atlantic"                       
##  [7] "Crab Unc"                            "Cucumber Sea"                       
##  [9] "Cusk"                                "Elver"                              
## [11] "Flounder Atlantic Witch (Gray Sole)" "Flounder Winter"                    
## [13] "Flounder Yellowtail"                 "Haddock"                            
## [15] "Hake Silver (Whiting)"               "Hake White"                         
## [17] "Halibut Atlantic"                    "Herring Atlantic"                   
## [19] "Lobster American"                    "Mackerel Atlantic"                  
## [21] "Menhaden Atlantic"                   "Monkfish"                           
## [23] "Mussel Blue Sea"                     "Oysters"                            
## [25] "Periwinkles Atlantic (Cockles)"      "Plaice American (Dab)"              
## [27] "Pollock"                             "Redfish Acadian Ocean Perch"        
## [29] "Rockweed"                            "Salmon Atlantic"                    
## [31] "Sandworms"                           "Scallop Sea"                        
## [33] "Sea Urchins Green"                   "Seaweed Unc"                        
## [35] "Shark Dogfish"                       "Shrimp Northern"                    
## [37] "Skates"                              "Wolffish Atlantic"                  
## [39] "Yellow Eel"


Bubble plots

#pdf(here("Objective 2/Plots/species_bubble_full.pdf"))
for(i in 1:5){

print(ggplot()+
  geom_point(data=species_full, aes(x=year, y=1, size=total_weight))+
  theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks.y=element_blank())+
  scale_size(range = c(.1,12), name="Weight")+
  facet_wrap_paginate(~species, ncol=2, nrow=4, page=i))
}

#dev.off()

Remove species not caught in trawl

#remove species blanks, other, unclassified
species_full_filter1<-species_full%>%
  filter(species != "Skates")%>%
  filter(species != "Crab Unc")%>%
  filter(species != "Seaweed Unc")

remove_full<-anti_join(species_full_filter1, trawl_data_update)
remove_full<-as.data.frame(unique(remove_full$species))
remove_full<-rename(remove_full, species="unique(remove_full$species)")
remove_full
##                           species
## 1                      Bloodworms
## 2                       Clam Soft
## 3                           Elver
## 4                         Oysters
## 5  Periwinkles Atlantic (Cockles)
## 6                        Rockweed
## 7                 Salmon Atlantic
## 8                       Sandworms
## 9                   Shark Dogfish
## 10                     Yellow Eel
species_full_filter2<-anti_join(species_full_filter1, remove_full)


for(i in 1:4){

print(ggplot()+
  geom_point(data=species_full_filter2, aes(x=year, y=1, size=total_weight))+
  theme(axis.text.y = element_blank(), axis.title.y = element_blank(), axis.ticks.y=element_blank())+
  scale_size(range = c(.1,12), name="Weight")+
  facet_wrap_paginate(~species, ncol=2, nrow=4, page=i))
}


Top 10 species

order_full<-group_by(species_full, species)%>%
  summarise(avg_weight=mean(total_weight, na.rm=TRUE))%>%
  arrange(desc(avg_weight))

top_10_full<-order_full[1:10,1]
species_top<-inner_join(species_full, top_10)
  
p2<-ggplot(data=species_top, aes(x=year, y=total_weight, color=species))+
  geom_line()+ theme_classic()
p2


Diveristy metrics

  • options for…
    • #1 all landings or
    • #2 species filtered for best comparison
  • ME-NH Inshore Trawl Survey and ME landings biodiversity indices are not a direct comparison as the first is calculated on a tow basis and the other an annual basis.

Species richness

County specified data

richness<-group_by(landings_total, county, year)%>%
  summarise(richness=length(unique(species)))

ggplot()+ geom_line(data=richness, aes(year, richness))+
  facet_grid(rows=vars(county))+theme_bw()

richness<-group_by(landings_total, year)%>%
  summarise(richness=length(unique(species)))

rich<-ggplot()+ geom_line(data=subset(richness, year<2020), aes(year, richness))+ theme_classic()+ ylab(expression(paste("Species\nRichness")))+theme(plot.margin = margin(10, 10, 10, 20))

#filtered for best comparison to trawl survey
richness2<-group_by(species_county_filter2, year)%>%
  summarise(richness=length(unique(species)))

rich2<-ggplot()+ geom_line(data=subset(richness2, year<2020), aes(year, richness))+ theme_classic()+ ylab(expression(paste("Species\nRichness")))+theme(plot.margin = margin(10, 10, 10, 20))

rich2

Full data not county specified

richness_full<-group_by(landings_full, year)%>%
  summarise(richness=length(unique(species)))


rich_full<-ggplot()+ geom_line(data=subset(richness_full, year>2005), aes(year, richness))+ theme_classic()+ ylab(expression(paste("Species\nRichness")))+theme(plot.margin = margin(10, 10, 10, 20))

rich_full

# filtered
richness_full2<-group_by(species_full_filter2, year)%>%
  summarise(richness=length(unique(species)))

rich_full2<-ggplot()+ geom_line(data=subset(richness_full2, year>2005), aes(year, richness))+ theme_classic()+ ylab(expression(paste("Species\nRichness")))+theme(plot.margin = margin(10, 10, 10, 20))

rich_full2

Shannon-Weiner diversity

County specified data

shannon<-group_by(landings_total, year,county)%>%
  mutate(total=sum(weight), prop=weight/total)%>%
  summarise(shannon=-1*(sum(prop*log(prop))))

ggplot()+geom_line(data=shannon, aes(year, shannon))+
  facet_grid(rows=vars(county))+theme_bw()

shannon<-group_by(landings_total, year,species)%>%
  mutate(species_total=sum(weight))%>%
  group_by(year)%>%
  mutate(total=sum(species_total),prop=species_total/total)%>%
  summarise(shannon=-1*(sum(prop*log(prop))))

diversity<-ggplot()+geom_line(data=subset(shannon, year<2020), aes(year, shannon))+theme_classic()+ ylab(expression(paste("Shannon-Weiner\nDiversity")))+theme(plot.margin = margin(10, 10, 10, 20))

diversity

#filtered
shannon2<-group_by(species_county_filter2, year,species)%>%
  mutate(species_total=sum(total_weight))%>%
  group_by(year)%>%
  mutate(total=sum(species_total),prop=species_total/total)%>%
  summarise(shannon=-1*(sum(prop*log(prop))))

diversity2<-ggplot()+geom_line(data=subset(shannon2, year<2020), aes(year, shannon))+theme_classic()+ ylab(expression(paste("Shannon-Weiner\nDiversity")))+theme(plot.margin = margin(10, 10, 10, 20))

diversity2

Full data

shannon_full<-group_by(landings_full, year,species)%>%
  mutate(species_total=sum(total_weight))%>%
  group_by(year)%>%
  mutate(total=sum(species_total),prop=species_total/total)%>%
  summarise(shannon=-1*(sum(prop*log(prop))))

diversity_full<-ggplot()+geom_line(data=subset(shannon_full, year>2005), aes(year, shannon))+theme_classic()+ ylab(expression(paste("Shannon-Weiner\nDiversity")))+theme(plot.margin = margin(10, 10, 10, 20))

diversity_full

#filtered data
shannon_full2<-group_by(species_full_filter2, year,species)%>%
  mutate(species_total=sum(total_weight))%>%
  group_by(year)%>%
  mutate(total=sum(species_total),prop=species_total/total)%>%
  summarise(shannon=-1*(sum(prop*log(prop))))

diversity_full2<-ggplot()+geom_line(data=subset(shannon_full2, year>2005), aes(year, shannon))+theme_classic()+ ylab(expression(paste("Shannon-Weiner\nDiversity")))+theme(plot.margin = margin(10, 10, 10, 20))

diversity_full2

Simpson’s diversity and evenness

County specified data

simpson<-group_by(landings_total, year, county)%>%
  mutate(total=sum(weight), prop=weight/total)%>%
  summarise(simpson=1/(sum(prop^2)) )

ggplot()+geom_line(data=simpson, aes(year, simpson))+
  facet_grid(rows=vars(county))+theme_bw()

simpson<-group_by(landings_total, year,species)%>%
  summarise(species_total=sum(weight))%>% #aggregate counties to get yearly species totals for all of Maine
  group_by(year)%>%
  mutate(richness=length(unique(species)))%>%
  mutate(total=sum(species_total))%>%
  mutate(prop=species_total/total)%>%
  summarise(simpsonD=1/(sum(prop^2)),simpsonE=simpsonD*(1/richness))
   
#ggplot()+geom_line(data=simpson, aes(year, simpsonD))+theme_classic()

evenness<-ggplot()+geom_line(data=subset(simpson, year<2020), aes(year, simpsonE))+theme_classic()+ ylab(expression(paste("Simpson's\nEvenness")))+theme(plot.margin = margin(10, 10, 10, 20))

evenness

#filtered data
simpson2<-group_by(species_county_filter2, year,species)%>%
  summarise(species_total=sum(total_weight))%>% #aggregate counties to get yearly species totals for all of Maine
  group_by(year)%>%
  mutate(richness=length(unique(species)))%>%
  mutate(total=sum(species_total))%>%
  mutate(prop=species_total/total)%>%
  summarise(simpsonD=1/(sum(prop^2)),simpsonE=simpsonD*(1/richness))
   
#ggplot()+geom_line(data=simpson2, aes(year, simpsonD))+theme_classic()

evenness2<-ggplot()+geom_line(data=subset(simpson2, year<2020), aes(year, simpsonE))+theme_classic()+ ylab(expression(paste("Simpson's\nEvenness")))+theme(plot.margin = margin(10, 10, 10, 20))

evenness2

Full data

simpson_full<-group_by(landings_full, year,species)%>%
  summarise(species_total=sum(total_weight))%>% #aggregate counties to get yearly species totals for all of Maine
  group_by(year)%>%
  mutate(richness=length(unique(species)))%>%
  mutate(total=sum(species_total))%>%
  mutate(prop=species_total/total)%>%
  summarise(simpsonD=1/(sum(prop^2)),simpsonE=simpsonD*(1/richness))
   
#ggplot()+geom_line(data=simpson_full, aes(year, simpsonD))+theme_classic()

evenness_full<-ggplot()+geom_line(data=subset(simpson_full, year>2005), aes(year, simpsonE))+theme_classic()+ ylab(expression(paste("Simpson's\nEvenness")))+theme(plot.margin = margin(10, 10, 10, 20))

evenness_full

#filtered data
simpson_full2<-group_by(species_full_filter2, year,species)%>%
  summarise(species_total=sum(total_weight))%>% #aggregate counties to get yearly species totals for all of Maine
  group_by(year)%>%
  mutate(richness=length(unique(species)))%>%
  mutate(total=sum(species_total))%>%
  mutate(prop=species_total/total)%>%
  summarise(simpsonD=1/(sum(prop^2)),simpsonE=simpsonD*(1/richness))
   
#ggplot()+geom_line(data=simpson_full2, aes(year, simpsonD))+theme_classic()

evenness_full2<-ggplot()+geom_line(data=subset(simpson_full2, year>2005), aes(year, simpsonE))+theme_classic()+ ylab(expression(paste("Simpson's\nEvenness")))+theme(plot.margin = margin(10, 10, 10, 20))

evenness_full2

Average taxinomic distinctness

Country specified data

library(taxize)
library(purrr)
library(mgcv)

species<-read.csv(here("Data/species_landings.csv"))%>%
  filter(scientific.name!="")%>%
  filter(scientific.name!="Fucus vesiculosus")%>%
  rename(common_name=species, Species=scientific.name)
  

diff_species<-as.vector(unique(species$Species))
tax <- classification(diff_species, db = 'itis') 
## ==  42 queries  ==============
## v  Found:  Gadus morhua
## v  Found:  Cancer irroratus
## v  Found:  Cancer borealis
## v  Found:  Brosme brosme
## v  Found:  Anguilla rostrata
## v  Found:  Glyptocephalus cynoglossus
## v  Found:  Pseudopleuronectes americanus
## v  Found:  Melanogrammus aeglefinus
## v  Found:  Urophycis tenuis
## v  Found:  Hippoglossus hippoglossus
## v  Found:  Clupea harengus
## v  Found:  Homarus americanus
## v  Found:  Lophius americanus
## v  Found:  Hippoglossoides platessoides
## v  Found:  Pollachius virens
## v  Found:  Sebastes fasciatus
## v  Found:  Placopecten magellanicus
## v  Found:  Pandalus borealis
## v  Found:  Anarhichas lupus
## v  Found:  Mytilus edulis
## v  Found:  Strongylocentrotus droebachiensis
## v  Found:  Crassostrea virginica
## v  Found:  Mercenaria mercenaria
## v  Found:  Limanda ferruginea
## v  Found:  Merluccius bilinearis
## v  Found:  Scomber scombrus
## v  Found:  Cucumaria frondosa
## v  Found:  Pomatomus saltatrix
## v  Found:  Brevoortia tyrannus
## v  Found:  Arctica islandica
## v  Found:  Ensis directus
## v  Found:  Squalus acanthias
## v  Found:  Myxine glutinosa
## v  Found:  Ostrea edulis
## v  Found:  Littorina littorea
## v  Found:  Loligo pealeii
## v  Found:  Thunnus thynnus
## v  Found:  Xiphias gladius
## v  Found:  Buccinum undatum
## v  Found:  Illex illecebrosus
## v  Found:  Thunnus obesus
## v  Found:  Spisula solidissima
## ==  Results  =================
## 
## * Total: 42 
## * Found: 42 
## * Not Found: 0
info <- matrix(NA)
expand <- matrix(NA)
specific <- matrix(NA, nrow = length(diff_species), ncol=6)


for (i in 1:length(tax)){
info <- tax[[i]][c('name','rank')]
expand <- info[info$rank == 'phylum'| info$rank == 'class'| info$rank == 'order' | info$rank == 'family' | info$rank == 'genus' | info$rank == 'species',]
specific[i,] <- as.vector(expand$name)

}
colnames(specific) <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")
phylo<-as.data.frame(specific)

merge<-left_join(species,phylo, by="Species")
  
landings_tax<-rename(landings_total, common_name=species)%>%
  full_join(merge)%>%
  filter(!is.na(Species))

landings_tax_groups<-group_by(landings_tax, year,Species)%>%
  summarise(weight=sum(weight))%>%
  mutate(indicator=cur_group_id())%>%
  left_join(phylo)

hauls <- unique(landings_tax_groups$indicator)   
N_hauls <- length(hauls) # number of hauls
N_species <- NULL #N species
sub_species <- NULL # N_species-1
total <- NULL  #xixj
numerator <- NULL #wijxixj
x_y <- matrix(NA, nrow = 6, ncol = 6) 
x <- NULL
y <- NULL
ident <- NULL
weight <- NULL #wij
count <- NULL
total_weight <- NULL
mean_weight <- NULL
weight_var <- NULL
delta <- NULL
delta_star <- NULL
delta_plus <- NULL
delta_var <- NULL
weight_var <- NULL

for (j in 1:N_hauls) {  
diff_hauls <- landings_tax_groups[which(landings_tax_groups$indicator == j),] #subset unique hauls/functional groups
N_species[j] <- length(unique(diff_hauls$Species))# count the number of unique species in each haul (denominator)
sub_species[j] <- N_species[j]-1
diff <- unique(as.vector(diff_hauls$Species)) # name of each unique species 
combos <- combn(diff, 2) # create combinations of each species/haul (for weight calc)

phylo <- as.matrix(subset(diff_hauls, select = c(Phylum,Class,Order,Family,Genus, Species))) # extract phylogenetic information only
unique_phylo <- uniquecombs(phylo) # subset by unique species information
unique_phylo <- as.data.frame(unique_phylo)

total <- NULL  # reset the length for each haul because they will be different
weight <- NULL # reset  

for (i in 1:ncol(combos)) { # for each unique combination count the number of each species 
  total[i] <- sum(diff_hauls$Species == combos[1,i]) * sum(diff_hauls$Species == combos[2,i]) #empty vector is always length 210
  #total[i] <- diff_hauls[diff_hauls[,22] == combos[1,i],9] * diff_hauls[diff_hauls[,22] == combos[2,i],9]
  x <- unique_phylo[unique_phylo$Species == combos[1,i],]
  y <- unique_phylo[unique_phylo$Species == combos[2,i],]
  x_y <- rbind(x,y)
  
  for (k in 1:ncol(x_y)){ # for each combination calculate the weight value 
    ident[k] <- identical(as.vector(x_y[1,k]), as.vector(x_y[2,k])) # determine how much of phylogenetic information is the same
    weight[i] <- sum(ident == "FALSE") # vector of weights
    #mean_weight[i] <- mean(weight) #rep(mean(weight),length(weight))
    numerator[j] <- sum(total*weight) 
    count[j] <- sum(total)
    mean_weight[j] <- mean(weight)
    total_weight[j] <- sum(weight)
    weight_var[j] <- sum((weight- mean(weight))^2) 
  }
  delta <- (2*numerator)/(N_species*sub_species)
  delta_star <- numerator/(count)
  delta_plus <- (2*total_weight)/(N_species*sub_species)
  delta_var <- (2*weight_var)/(N_species*sub_species) #double check that this equation is correct
}
}
years<-2006:2020
delta<-as.data.frame(delta[1:15]) # taxonomic diversity
delta_star<-as.data.frame(delta_star[1:15]) # taxonomic distinctness
delta_plus<-as.data.frame(delta_plus[1:15]) # average taxonomic distinctness
delta_var<-as.data.frame(delta_var[1:15]) # variation in taxonomic distinctness

d<-bind_cols(years,delta, delta_star,delta_plus,delta_var)
colnames(d)<-c("year", "delta", "delta_star", "delta_plus","delta_var")

#write.csv(d, here("Data/tax_metrics.csv"))

tax.distinct<-ggplot()+geom_line(data=subset(d, year<2020), aes(year, delta_plus))+theme_classic()+ylab(expression(paste("Average\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 25))
                                                                        tax.distinct    

With filtered data

species<-read.csv(here("Data/species_landings.csv"))%>%
  filter(scientific.name!="")%>%
  filter(scientific.name!="Fucus vesiculosus")%>%
  anti_join(remove_county)%>%
  rename(common_name=species, Species=scientific.name)
  

diff_species<-as.vector(unique(species$Species))
tax <- classification(diff_species, db = 'itis') 
## ==  35 queries  ==============
## v  Found:  Gadus morhua
## v  Found:  Cancer irroratus
## v  Found:  Cancer borealis
## v  Found:  Brosme brosme
## v  Found:  Anguilla rostrata
## v  Found:  Glyptocephalus cynoglossus
## v  Found:  Pseudopleuronectes americanus
## v  Found:  Melanogrammus aeglefinus
## v  Found:  Urophycis tenuis
## v  Found:  Hippoglossus hippoglossus
## v  Found:  Clupea harengus
## v  Found:  Homarus americanus
## v  Found:  Lophius americanus
## v  Found:  Hippoglossoides platessoides
## v  Found:  Pollachius virens
## v  Found:  Sebastes fasciatus
## v  Found:  Placopecten magellanicus
## v  Found:  Pandalus borealis
## v  Found:  Anarhichas lupus
## v  Found:  Mytilus edulis
## v  Found:  Strongylocentrotus droebachiensis
## v  Found:  Mercenaria mercenaria
## v  Found:  Limanda ferruginea
## v  Found:  Merluccius bilinearis
## v  Found:  Scomber scombrus
## v  Found:  Cucumaria frondosa
## v  Found:  Pomatomus saltatrix
## v  Found:  Brevoortia tyrannus
## v  Found:  Arctica islandica
## v  Found:  Ensis directus
## v  Found:  Squalus acanthias
## v  Found:  Ostrea edulis
## v  Found:  Loligo pealeii
## v  Found:  Buccinum undatum
## v  Found:  Illex illecebrosus
## ==  Results  =================
## 
## * Total: 35 
## * Found: 35 
## * Not Found: 0
info <- matrix(NA)
expand <- matrix(NA)
specific <- matrix(NA, nrow = length(diff_species), ncol=6)


for (i in 1:length(tax)){
info <- tax[[i]][c('name','rank')]
expand <- info[info$rank == 'phylum'| info$rank == 'class'| info$rank == 'order' | info$rank == 'family' | info$rank == 'genus' | info$rank == 'species',]
specific[i,] <- as.vector(expand$name)

}
colnames(specific) <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")
phylo<-as.data.frame(specific)

merge<-left_join(species,phylo, by="Species")
  
landings_tax<-rename(landings_total, common_name=species)%>%
  full_join(merge)%>%
  filter(!is.na(Species))

landings_tax_groups<-group_by(landings_tax, year,Species)%>%
  summarise(weight=sum(weight))%>%
  mutate(indicator=cur_group_id())%>%
  left_join(phylo)

hauls <- unique(landings_tax_groups$indicator)   
N_hauls <- length(hauls) # number of hauls
N_species <- NULL #N species
sub_species <- NULL # N_species-1
total <- NULL  #xixj
numerator <- NULL #wijxixj
x_y <- matrix(NA, nrow = 6, ncol = 6) 
x <- NULL
y <- NULL
ident <- NULL
weight <- NULL #wij
count <- NULL
total_weight <- NULL
mean_weight <- NULL
weight_var <- NULL
delta <- NULL
delta_star <- NULL
delta_plus <- NULL
delta_var <- NULL
weight_var <- NULL

for (j in 1:N_hauls) {  
diff_hauls <- landings_tax_groups[which(landings_tax_groups$indicator == j),] #subset unique hauls/functional groups
N_species[j] <- length(unique(diff_hauls$Species))# count the number of unique species in each haul (denominator)
sub_species[j] <- N_species[j]-1
diff <- unique(as.vector(diff_hauls$Species)) # name of each unique species 
combos <- combn(diff, 2) # create combinations of each species/haul (for weight calc)

phylo <- as.matrix(subset(diff_hauls, select = c(Phylum,Class,Order,Family,Genus, Species))) # extract phylogenetic information only
unique_phylo <- uniquecombs(phylo) # subset by unique species information
unique_phylo <- as.data.frame(unique_phylo)

total <- NULL  # reset the length for each haul because they will be different
weight <- NULL # reset  

for (i in 1:ncol(combos)) { # for each unique combination count the number of each species 
  total[i] <- sum(diff_hauls$Species == combos[1,i]) * sum(diff_hauls$Species == combos[2,i]) #empty vector is always length 210
  #total[i] <- diff_hauls[diff_hauls[,22] == combos[1,i],9] * diff_hauls[diff_hauls[,22] == combos[2,i],9]
  x <- unique_phylo[unique_phylo$Species == combos[1,i],]
  y <- unique_phylo[unique_phylo$Species == combos[2,i],]
  x_y <- rbind(x,y)
  
  for (k in 1:ncol(x_y)){ # for each combination calculate the weight value 
    ident[k] <- identical(as.vector(x_y[1,k]), as.vector(x_y[2,k])) # determine how much of phylogenetic information is the same
    weight[i] <- sum(ident == "FALSE") # vector of weights
    #mean_weight[i] <- mean(weight) #rep(mean(weight),length(weight))
    numerator[j] <- sum(total*weight) 
    count[j] <- sum(total)
    mean_weight[j] <- mean(weight)
    total_weight[j] <- sum(weight)
    weight_var[j] <- sum((weight- mean(weight))^2) 
  }
  delta <- (2*numerator)/(N_species*sub_species)
  delta_star <- numerator/(count)
  delta_plus <- (2*total_weight)/(N_species*sub_species)
  delta_var <- (2*weight_var)/(N_species*sub_species) #double check that this equation is correct
}
}
years<-2006:2020
delta<-as.data.frame(delta[1:15]) # taxonomic diversity
delta_star<-as.data.frame(delta_star[1:15]) # taxonomic distinctness
delta_plus<-as.data.frame(delta_plus[1:15]) # average taxonomic distinctness
delta_var<-as.data.frame(delta_var[1:15]) # variation in taxonomic distinctness

d2<-bind_cols(years,delta, delta_star,delta_plus,delta_var)
colnames(d2)<-c("year", "delta", "delta_star", "delta_plus","delta_var")

#write.csv(d, here("Data/tax_metrics.csv"))

tax.distinct2<-ggplot()+geom_line(data=subset(d2, year<2020), aes(year, delta_plus))+theme_classic()+ylab(expression(paste("Average\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 25))
                                                                        tax.distinct2    

Full data

unique<-as.data.frame(unique(species_full$species))%>%
  rename(common_name="unique(species_full$species)")

species2<-read.csv(here("Data/species_landings.csv"))%>%
  filter(scientific.name!="Fucus vesiculosus")%>%
  anti_join(remove_full)%>%
  rename(common_name=species, Species=scientific.name)%>%
  right_join(unique)

species2$Species[species2$common_name=="Alewife"]<-"Alosa pseudoharengus" 
species2$Species[species2$common_name=="Salmon Atlantic"]<-"Salmo salar"

species<-species2%>%
  filter(Species!="NA")%>%
  filter(Species!="")

diff_species<-as.vector(unique(species$Species))
tax <- classification(diff_species, db = 'itis') 
## ==  27 queries  ==============
## v  Found:  Gadus morhua
## v  Found:  Brosme brosme
## v  Found:  Glyptocephalus cynoglossus
## v  Found:  Pseudopleuronectes americanus
## v  Found:  Melanogrammus aeglefinus
## v  Found:  Urophycis tenuis
## v  Found:  Hippoglossus hippoglossus
## v  Found:  Clupea harengus
## v  Found:  Homarus americanus
## v  Found:  Lophius americanus
## v  Found:  Hippoglossoides platessoides
## v  Found:  Pollachius virens
## v  Found:  Sebastes fasciatus
## v  Found:  Placopecten magellanicus
## v  Found:  Pandalus borealis
## v  Found:  Anarhichas lupus
## v  Found:  Mytilus edulis
## v  Found:  Strongylocentrotus droebachiensis
## v  Found:  Mercenaria mercenaria
## v  Found:  Limanda ferruginea
## v  Found:  Merluccius bilinearis
## v  Found:  Scomber scombrus
## v  Found:  Cucumaria frondosa
## v  Found:  Brevoortia tyrannus
## v  Found:  Arctica islandica
## v  Found:  Alosa pseudoharengus
## v  Found:  Salmo salar
## ==  Results  =================
## 
## * Total: 27 
## * Found: 27 
## * Not Found: 0
info <- matrix(NA)
expand <- matrix(NA)
specific <- matrix(NA, nrow = length(diff_species), ncol=6)


for (i in 1:length(tax)){
info <- tax[[i]][c('name','rank')]
expand <- info[info$rank == 'phylum'| info$rank == 'class'| info$rank == 'order' | info$rank == 'family' | info$rank == 'genus' | info$rank == 'species',]
specific[i,] <- as.vector(expand$name)

}
colnames(specific) <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")
phylo<-as.data.frame(specific)

merge<-left_join(species,phylo, by="Species")
  
landings_tax<-rename(landings_full, common_name=species)%>%
  full_join(merge)%>%
  filter(!is.na(Species))

landings_tax_groups<-group_by(landings_tax, year,Species)%>%
  summarise(weight=sum(total_weight))%>%
  mutate(indicator=cur_group_id())%>%
  left_join(phylo)

hauls <- unique(landings_tax_groups$indicator)   
N_hauls <- length(hauls) # number of hauls
N_species <- NULL #N species
sub_species <- NULL # N_species-1
total <- NULL  #xixj
numerator <- NULL #wijxixj
x_y <- matrix(NA, nrow = 6, ncol = 6) 
x <- NULL
y <- NULL
ident <- NULL
weight <- NULL #wij
count <- NULL
total_weight <- NULL
mean_weight <- NULL
weight_var <- NULL
delta <- NULL
delta_star <- NULL
delta_plus <- NULL
delta_var <- NULL
weight_var <- NULL

for (j in 1:N_hauls) {  
diff_hauls <- landings_tax_groups[which(landings_tax_groups$indicator == j),] #subset unique hauls/functional groups
N_species[j] <- length(unique(diff_hauls$Species))# count the number of unique species in each haul (denominator)
sub_species[j] <- N_species[j]-1
diff <- unique(as.vector(diff_hauls$Species)) # name of each unique species 
combos <- combn(diff, 2) # create combinations of each species/haul (for weight calc)

phylo <- as.matrix(subset(diff_hauls, select = c(Phylum,Class,Order,Family,Genus, Species))) # extract phylogenetic information only
unique_phylo <- uniquecombs(phylo) # subset by unique species information
unique_phylo <- as.data.frame(unique_phylo)

total <- NULL  # reset the length for each haul because they will be different
weight <- NULL # reset  

for (i in 1:ncol(combos)) { # for each unique combination count the number of each species 
  total[i] <- sum(diff_hauls$Species == combos[1,i]) * sum(diff_hauls$Species == combos[2,i]) #empty vector is always length 210
  #total[i] <- diff_hauls[diff_hauls[,22] == combos[1,i],9] * diff_hauls[diff_hauls[,22] == combos[2,i],9]
  x <- unique_phylo[unique_phylo$Species == combos[1,i],]
  y <- unique_phylo[unique_phylo$Species == combos[2,i],]
  x_y <- rbind(x,y)
  
  for (k in 1:ncol(x_y)){ # for each combination calculate the weight value 
    ident[k] <- identical(as.vector(x_y[1,k]), as.vector(x_y[2,k])) # determine how much of phylogenetic information is the same
    weight[i] <- sum(ident == "FALSE") # vector of weights
    #mean_weight[i] <- mean(weight) #rep(mean(weight),length(weight))
    numerator[j] <- sum(total*weight) 
    count[j] <- sum(total)
    mean_weight[j] <- mean(weight)
    total_weight[j] <- sum(weight)
    weight_var[j] <- sum((weight- mean(weight))^2) 
  }
  delta <- (2*numerator)/(N_species*sub_species)
  delta_star <- numerator/(count)
  delta_plus <- (2*total_weight)/(N_species*sub_species)
  delta_var <- (2*weight_var)/(N_species*sub_species) #double check that this equation is correct
}
}
years<-1950:2019
delta<-as.data.frame(delta[1:70]) # taxonomic diversity
delta_star<-as.data.frame(delta_star[1:70]) # taxonomic distinctness
delta_plus<-as.data.frame(delta_plus[1:70]) # average taxonomic distinctness
delta_var<-as.data.frame(delta_var[1:70]) # variation in taxonomic distinctness

d_full<-bind_cols(years,delta, delta_star,delta_plus,delta_var)
colnames(d_full)<-c("year", "delta", "delta_star", "delta_plus","delta_var")

#write.csv(d_full, here("Data/tax_metrics_full.csv"))

tax.distinct_full<-ggplot()+geom_line(data=subset(d_full, year>2005), aes(year, delta_plus))+theme_classic()+ylab(expression(paste("Average\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 25))
                                                                      tax.distinct_full    

filtered data

library(taxize)
library(purrr)
library(mgcv)

unique<-as.data.frame(unique(species_full_filter2$species))%>%
  rename(common_name="unique(species_full_filter2$species)")

species2<-read.csv(here("Data/species_landings.csv"))%>%
  filter(scientific.name!="Fucus vesiculosus")%>%
  rename(common_name=species, Species=scientific.name)%>%
  right_join(unique)

species2$Species[species2$common_name=="Alewife"]<-"Alosa pseudoharengus" 

species<-species2%>%
  filter(Species!="NA")%>%
  filter(Species!="")

diff_species<-as.vector(unique(species$Species))
tax <- classification(diff_species, db = 'itis') 
## ==  26 queries  ==============
## v  Found:  Gadus morhua
## v  Found:  Brosme brosme
## v  Found:  Glyptocephalus cynoglossus
## v  Found:  Pseudopleuronectes americanus
## v  Found:  Melanogrammus aeglefinus
## v  Found:  Urophycis tenuis
## v  Found:  Hippoglossus hippoglossus
## v  Found:  Clupea harengus
## v  Found:  Homarus americanus
## v  Found:  Lophius americanus
## v  Found:  Hippoglossoides platessoides
## v  Found:  Pollachius virens
## v  Found:  Sebastes fasciatus
## v  Found:  Placopecten magellanicus
## v  Found:  Pandalus borealis
## v  Found:  Anarhichas lupus
## v  Found:  Mytilus edulis
## v  Found:  Strongylocentrotus droebachiensis
## v  Found:  Mercenaria mercenaria
## v  Found:  Limanda ferruginea
## v  Found:  Merluccius bilinearis
## v  Found:  Scomber scombrus
## v  Found:  Cucumaria frondosa
## v  Found:  Brevoortia tyrannus
## v  Found:  Arctica islandica
## v  Found:  Alosa pseudoharengus
## ==  Results  =================
## 
## * Total: 26 
## * Found: 26 
## * Not Found: 0
info <- matrix(NA)
expand <- matrix(NA)
specific <- matrix(NA, nrow = length(diff_species), ncol=6)


for (i in 1:length(tax)){
info <- tax[[i]][c('name','rank')]
expand <- info[info$rank == 'phylum'| info$rank == 'class'| info$rank == 'order' | info$rank == 'family' | info$rank == 'genus' | info$rank == 'species',]
specific[i,] <- as.vector(expand$name)

}
colnames(specific) <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")
phylo<-as.data.frame(specific)

merge<-left_join(species,phylo, by="Species")
  
landings_tax<-rename(landings_full, common_name=species)%>%
  full_join(merge)%>%
  filter(!is.na(Species))

landings_tax_groups<-group_by(landings_tax, year,Species)%>%
  summarise(weight=sum(total_weight))%>%
  mutate(indicator=cur_group_id())%>%
  left_join(phylo)

hauls <- unique(landings_tax_groups$indicator)   
N_hauls <- length(hauls) # number of hauls
N_species <- NULL #N species
sub_species <- NULL # N_species-1
total <- NULL  #xixj
numerator <- NULL #wijxixj
x_y <- matrix(NA, nrow = 6, ncol = 6) 
x <- NULL
y <- NULL
ident <- NULL
weight <- NULL #wij
count <- NULL
total_weight <- NULL
mean_weight <- NULL
weight_var <- NULL
delta <- NULL
delta_star <- NULL
delta_plus <- NULL
delta_var <- NULL
weight_var <- NULL

for (j in 1:N_hauls) {  
diff_hauls <- landings_tax_groups[which(landings_tax_groups$indicator == j),] #subset unique hauls/functional groups
N_species[j] <- length(unique(diff_hauls$Species))# count the number of unique species in each haul (denominator)
sub_species[j] <- N_species[j]-1
diff <- unique(as.vector(diff_hauls$Species)) # name of each unique species 
combos <- combn(diff, 2) # create combinations of each species/haul (for weight calc)

phylo <- as.matrix(subset(diff_hauls, select = c(Phylum,Class,Order,Family,Genus, Species))) # extract phylogenetic information only
unique_phylo <- uniquecombs(phylo) # subset by unique species information
unique_phylo <- as.data.frame(unique_phylo)

total <- NULL  # reset the length for each haul because they will be different
weight <- NULL # reset  

for (i in 1:ncol(combos)) { # for each unique combination count the number of each species 
  total[i] <- sum(diff_hauls$Species == combos[1,i]) * sum(diff_hauls$Species == combos[2,i]) #empty vector is always length 210
  #total[i] <- diff_hauls[diff_hauls[,22] == combos[1,i],9] * diff_hauls[diff_hauls[,22] == combos[2,i],9]
  x <- unique_phylo[unique_phylo$Species == combos[1,i],]
  y <- unique_phylo[unique_phylo$Species == combos[2,i],]
  x_y <- rbind(x,y)
  
  for (k in 1:ncol(x_y)){ # for each combination calculate the weight value 
    ident[k] <- identical(as.vector(x_y[1,k]), as.vector(x_y[2,k])) # determine how much of phylogenetic information is the same
    weight[i] <- sum(ident == "FALSE") # vector of weights
    #mean_weight[i] <- mean(weight) #rep(mean(weight),length(weight))
    numerator[j] <- sum(total*weight) 
    count[j] <- sum(total)
    mean_weight[j] <- mean(weight)
    total_weight[j] <- sum(weight)
    weight_var[j] <- sum((weight- mean(weight))^2) 
  }
  delta <- (2*numerator)/(N_species*sub_species)
  delta_star <- numerator/(count)
  delta_plus <- (2*total_weight)/(N_species*sub_species)
  delta_var <- (2*weight_var)/(N_species*sub_species) #double check that this equation is correct
}
}
years<-1950:2019
delta<-as.data.frame(delta[1:70]) # taxonomic diversity
delta_star<-as.data.frame(delta_star[1:70]) # taxonomic distinctness
delta_plus<-as.data.frame(delta_plus[1:70]) # average taxonomic distinctness
delta_var<-as.data.frame(delta_var[1:70]) # variation in taxonomic distinctness

d_full2<-bind_cols(years,delta, delta_star,delta_plus,delta_var)
colnames(d_full2)<-c("year", "delta", "delta_star", "delta_plus","delta_var")

#write.csv(d_full, here("Data/tax_metrics_full.csv"))

tax.distinct_full2<-ggplot()+geom_line(data=subset(d_full2, year>2005), aes(year, delta_plus))+theme_classic()+ylab(expression(paste("Average\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 25))
                                                                      tax.distinct_full2

Combo plot

County specified data

library(reshape2)
library(gridExtra)

all_metrics<-full_join(richness,shannon, by="year")%>%
  full_join(simpson)%>%
  full_join(d)

all_melt<-melt(all_metrics, id="year")

ggplot()+geom_line(data=all_melt, aes(year,value))+facet_grid(rows = vars(variable), scales="free")+theme_bw()

grid.arrange(rich, diversity, evenness,tax.distinct, nrow=4)

#filtered
all_metrics2<-full_join(richness2,shannon2, by="year")%>%
  full_join(simpson2)%>%
  full_join(d2)

all_melt2<-melt(all_metrics2, id="year")

ggplot()+geom_line(data=all_melt2, aes(year,value))+facet_grid(rows = vars(variable), scales="free")+theme_bw()

grid.arrange(rich2, diversity2, evenness2,tax.distinct2, nrow=4)

Full data

library(reshape2)
library(gridExtra)

all_metrics_full<-full_join(richness_full,shannon_full, by="year")%>%
  full_join(simpson_full)%>%
  full_join(d_full)

all_melt_full<-melt(all_metrics_full, id="year")

ggplot()+geom_line(data=all_melt_full, aes(year,value))+facet_grid(rows = vars(variable), scales="free")+theme_bw()

grid.arrange(rich_full, diversity_full, evenness_full,tax.distinct_full, nrow=4)

#filtered data
all_metrics_full2<-full_join(richness_full2,shannon_full2, by="year")%>%
  full_join(simpson_full2)%>%
  full_join(d_full2)

all_melt_full2<-melt(all_metrics_full2, id="year")

ggplot()+geom_line(data=all_melt_full2, aes(year,value))+facet_grid(rows = vars(variable), scales="free")+theme_bw()

grid.arrange(rich_full2, diversity_full2, evenness_full2,tax.distinct_full2, nrow=4)


Correlation analysis

  • compare trawl and landings
library(corrplot)
library(PerformanceAnalytics)
# combine data
trawl_metrics<-read.csv(here("Data/trawl_biodiversity_update.csv"))%>%
  rename(richness_trawl=richness, shannon_trawl=shannon, simpsonE_trawl=simpsonE, delta_plus_trawl=delta_plus, year=Year)%>%
  dplyr::select(2,3,4,6,9)

# trawl_melt<-melt(trawl_metrics, id="year")
# 
# ggplot()+geom_line(data=trawl_melt, aes(year,value))+facet_grid(rows = vars(variable), scales="free")+theme_bw()


landings_metrics<-dplyr::select(all_metrics2, 1,2,3,5,8)

combo<-full_join(landings_metrics,trawl_metrics)%>%
  filter(year>2005)

#correlation test
cor<-round(cor(combo),2)
cor.table<-as.data.frame(as.table(cor))


corrplot(cor, type="upper")

chart.Correlation(cor, histogram=TRUE)

cor.table
##                Var1             Var2  Freq
## 1              year             year  1.00
## 2          richness             year -0.47
## 3           shannon             year -0.72
## 4          simpsonE             year -0.71
## 5        delta_plus             year  0.36
## 6    richness_trawl             year -0.54
## 7     shannon_trawl             year -0.62
## 8    simpsonE_trawl             year -0.77
## 9  delta_plus_trawl             year  0.13
## 10             year         richness -0.47
## 11         richness         richness  1.00
## 12          shannon         richness  0.59
## 13         simpsonE         richness -0.04
## 14       delta_plus         richness -0.12
## 15   richness_trawl         richness  0.24
## 16    shannon_trawl         richness  0.43
## 17   simpsonE_trawl         richness  0.59
## 18 delta_plus_trawl         richness  0.03
## 19             year          shannon -0.72
## 20         richness          shannon  0.59
## 21          shannon          shannon  1.00
## 22         simpsonE          shannon  0.71
## 23       delta_plus          shannon -0.43
## 24   richness_trawl          shannon  0.17
## 25    shannon_trawl          shannon  0.57
## 26   simpsonE_trawl          shannon  0.94
## 27 delta_plus_trawl          shannon -0.33
## 28             year         simpsonE -0.71
## 29         richness         simpsonE -0.04
## 30          shannon         simpsonE  0.71
## 31         simpsonE         simpsonE  1.00
## 32       delta_plus         simpsonE -0.35
## 33   richness_trawl         simpsonE  0.30
## 34    shannon_trawl         simpsonE  0.49
## 35   simpsonE_trawl         simpsonE  0.65
## 36 delta_plus_trawl         simpsonE -0.34
## 37             year       delta_plus  0.36
## 38         richness       delta_plus -0.12
## 39          shannon       delta_plus -0.43
## 40         simpsonE       delta_plus -0.35
## 41       delta_plus       delta_plus  1.00
## 42   richness_trawl       delta_plus  0.19
## 43    shannon_trawl       delta_plus -0.31
## 44   simpsonE_trawl       delta_plus -0.60
## 45 delta_plus_trawl       delta_plus -0.01
## 46             year   richness_trawl -0.54
## 47         richness   richness_trawl  0.24
## 48          shannon   richness_trawl  0.17
## 49         simpsonE   richness_trawl  0.30
## 50       delta_plus   richness_trawl  0.19
## 51   richness_trawl   richness_trawl  1.00
## 52    shannon_trawl   richness_trawl  0.63
## 53   simpsonE_trawl   richness_trawl  0.04
## 54 delta_plus_trawl   richness_trawl -0.27
## 55             year    shannon_trawl -0.62
## 56         richness    shannon_trawl  0.43
## 57          shannon    shannon_trawl  0.57
## 58         simpsonE    shannon_trawl  0.49
## 59       delta_plus    shannon_trawl -0.31
## 60   richness_trawl    shannon_trawl  0.63
## 61    shannon_trawl    shannon_trawl  1.00
## 62   simpsonE_trawl    shannon_trawl  0.49
## 63 delta_plus_trawl    shannon_trawl -0.07
## 64             year   simpsonE_trawl -0.77
## 65         richness   simpsonE_trawl  0.59
## 66          shannon   simpsonE_trawl  0.94
## 67         simpsonE   simpsonE_trawl  0.65
## 68       delta_plus   simpsonE_trawl -0.60
## 69   richness_trawl   simpsonE_trawl  0.04
## 70    shannon_trawl   simpsonE_trawl  0.49
## 71   simpsonE_trawl   simpsonE_trawl  1.00
## 72 delta_plus_trawl   simpsonE_trawl -0.16
## 73             year delta_plus_trawl  0.13
## 74         richness delta_plus_trawl  0.03
## 75          shannon delta_plus_trawl -0.33
## 76         simpsonE delta_plus_trawl -0.34
## 77       delta_plus delta_plus_trawl -0.01
## 78   richness_trawl delta_plus_trawl -0.27
## 79    shannon_trawl delta_plus_trawl -0.07
## 80   simpsonE_trawl delta_plus_trawl -0.16
## 81 delta_plus_trawl delta_plus_trawl  1.00


Regional landings diveristy

Landings assigned to survey regions based on county lines.

Trawl survey regions with counties outlined Maine counties

# add regions by county
landings_county<-landings_total
#table(landings_county$county)

landings_county$Region<-NULL
landings_county$Region[landings_county$county=="York"]<-1
landings_county$Region[landings_county$county=="Cumberland"]<-2
landings_county$Region[landings_county$county=="Sagadahoc"]<-2
landings_county$Region[landings_county$county=="Lincoln"]<-2
landings_county$Region[landings_county$county=="Knox"]<-3
landings_county$Region[landings_county$county=="Waldo"]<-3
landings_county$Region[landings_county$county=="Hancock"]<-4
landings_county$Region[landings_county$county=="Washington"]<-5

# group to east/west of Penobscot Bay
landings_county$group<-NULL
landings_county$group[landings_county$Region==1]<-"West"
landings_county$group[landings_county$Region==2]<-"West"
landings_county$group[landings_county$Region==3]<-"Penobscot"
landings_county$group[landings_county$Region==4]<-"East"
landings_county$group[landings_county$Region==5]<-"East"

landings_county<-filter(landings_county,!is.na(landings_county$group))

Species richness

richness<-group_by(landings_county, group, year)%>%
  summarise(richness=length(unique(species)))

ggplot()+ geom_line(data=richness, aes(year, richness))+
  facet_grid(rows=vars(group))+theme_bw()

Shannon-Weiner diversity

shannon<-group_by(landings_county, year,group)%>%
  mutate(total=sum(weight), prop=weight/total)%>%
  summarise(shannon=-1*(sum(prop*log(prop))))

ggplot()+geom_line(data=shannon, aes(year, shannon))+
  facet_grid(rows=vars(group))+theme_bw()

Simpson’s diversity and evenness

simpson<-group_by(landings_county, year,group,species)%>%
  summarise(species_total=sum(weight))%>% #aggregate counties to get yearly species totals for all of Maine
  group_by(year, group)%>%
  mutate(richness=length(unique(species)))%>%
  mutate(total=sum(species_total))%>%
  mutate(prop=species_total/total)%>%
  summarise(simpsonD=1/(sum(prop^2)),simpsonE=simpsonD*(1/richness))

ggplot()+geom_line(data=simpson, aes(year, simpsonE))+
  facet_grid(rows=vars(group))+theme_bw()

Average taxinomic distinctness

library(taxize)
library(purrr)
library(mgcv)
library(gridExtra)

species<-read.csv(here("Data/species_landings.csv"))%>%
  filter(scientific.name!="")%>%
  filter(scientific.name!="Fucus vesiculosus")%>%
  rename(common_name=species, Species=scientific.name)
  

diff_species<-as.vector(unique(species$Species))
tax <- classification(diff_species, db = 'itis') 
## ==  42 queries  ==============
## v  Found:  Gadus morhua
## v  Found:  Cancer irroratus
## v  Found:  Cancer borealis
## v  Found:  Brosme brosme
## v  Found:  Anguilla rostrata
## v  Found:  Glyptocephalus cynoglossus
## v  Found:  Pseudopleuronectes americanus
## v  Found:  Melanogrammus aeglefinus
## v  Found:  Urophycis tenuis
## v  Found:  Hippoglossus hippoglossus
## v  Found:  Clupea harengus
## v  Found:  Homarus americanus
## v  Found:  Lophius americanus
## v  Found:  Hippoglossoides platessoides
## v  Found:  Pollachius virens
## v  Found:  Sebastes fasciatus
## v  Found:  Placopecten magellanicus
## v  Found:  Pandalus borealis
## v  Found:  Anarhichas lupus
## v  Found:  Mytilus edulis
## v  Found:  Strongylocentrotus droebachiensis
## v  Found:  Crassostrea virginica
## v  Found:  Mercenaria mercenaria
## v  Found:  Limanda ferruginea
## v  Found:  Merluccius bilinearis
## v  Found:  Scomber scombrus
## v  Found:  Cucumaria frondosa
## v  Found:  Pomatomus saltatrix
## v  Found:  Brevoortia tyrannus
## v  Found:  Arctica islandica
## v  Found:  Ensis directus
## v  Found:  Squalus acanthias
## v  Found:  Myxine glutinosa
## v  Found:  Ostrea edulis
## v  Found:  Littorina littorea
## v  Found:  Loligo pealeii
## v  Found:  Thunnus thynnus
## v  Found:  Xiphias gladius
## v  Found:  Buccinum undatum
## v  Found:  Illex illecebrosus
## v  Found:  Thunnus obesus
## v  Found:  Spisula solidissima
## ==  Results  =================
## 
## * Total: 42 
## * Found: 42 
## * Not Found: 0
info <- matrix(NA)
expand <- matrix(NA)
specific <- matrix(NA, nrow = length(diff_species), ncol=6)


for (i in 1:length(tax)){
info <- tax[[i]][c('name','rank')]
expand <- info[info$rank == 'phylum'| info$rank == 'class'| info$rank == 'order' | info$rank == 'family' | info$rank == 'genus' | info$rank == 'species',]
specific[i,] <- as.vector(expand$name)

}
colnames(specific) <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")
phylo<-as.data.frame(specific)

merge<-left_join(species,phylo, by="Species")
  
landings_tax<-rename(landings_county, common_name=species)%>%
  full_join(merge)%>%
  filter(!is.na(Species))


#break into groups
east_species<-filter(landings_tax, group=="East")%>%
  group_by(year,group,Species)%>%
  summarise(weight=sum(weight))%>%
  mutate(indicator=cur_group_id())%>%
  left_join(phylo)
  
west_species<-filter(landings_tax, group=="West")%>%
  group_by(year,group,Species)%>%
  summarise(weight=sum(weight))%>%
  mutate(indicator=cur_group_id())%>%
  left_join(phylo)

pen_species<-filter(landings_tax, group=="Penobscot")%>%
  group_by(year,group,Species)%>%
  summarise(weight=sum(weight))%>%
  mutate(indicator=cur_group_id())%>%
  left_join(phylo)

#east
hauls <- unique(east_species$indicator)   
N_hauls <- length(hauls) # number of hauls
N_species <- NULL #N species
sub_species <- NULL # N_species-1
total <- NULL  #xixj
numerator <- NULL #wijxixj
x_y <- matrix(NA, nrow = 6, ncol = 6) 
x <- NULL
y <- NULL
ident <- NULL
weight <- NULL #wij
count <- NULL
total_weight <- NULL
mean_weight <- NULL
weight_var <- NULL
delta <- NULL
delta_star <- NULL
delta_plus <- NULL
delta_var <- NULL
weight_var <- NULL

for (j in 1:N_hauls) {  
diff_hauls <- east_species[which(east_species$indicator == j),] #subset unique hauls/functional groups
N_species[j] <- length(unique(diff_hauls$Species))# count the number of unique species in each haul (denominator)
sub_species[j] <- N_species[j]-1
diff <- unique(as.vector(diff_hauls$Species)) # name of each unique species 
combos <- combn(diff, 2) # create combinations of each species/haul (for weight calc)

phylo <- as.matrix(subset(diff_hauls, select = c(Phylum,Class,Order,Family,Genus, Species))) # extract phylogenetic information only
unique_phylo <- uniquecombs(phylo) # subset by unique species information
unique_phylo <- as.data.frame(unique_phylo)

total <- NULL  # reset the length for each haul because they will be different
weight <- NULL # reset  

for (i in 1:ncol(combos)) { # for each unique combination count the number of each species 
  total[i] <- sum(diff_hauls$Species == combos[1,i]) * sum(diff_hauls$Species == combos[2,i]) #empty vector is always length 210
  #total[i] <- diff_hauls[diff_hauls[,22] == combos[1,i],9] * diff_hauls[diff_hauls[,22] == combos[2,i],9]
  x <- unique_phylo[unique_phylo$Species == combos[1,i],]
  y <- unique_phylo[unique_phylo$Species == combos[2,i],]
  x_y <- rbind(x,y)
  
  for (k in 1:ncol(x_y)){ # for each combination calculate the weight value 
    ident[k] <- identical(as.vector(x_y[1,k]), as.vector(x_y[2,k])) # determine how much of phylogenetic information is the same
    weight[i] <- sum(ident == "FALSE") # vector of weights
    #mean_weight[i] <- mean(weight) #rep(mean(weight),length(weight))
    numerator[j] <- sum(total*weight) 
    count[j] <- sum(total)
    mean_weight[j] <- mean(weight)
    total_weight[j] <- sum(weight)
    weight_var[j] <- sum((weight- mean(weight))^2) 
  }
  delta <- (2*numerator)/(N_species*sub_species)
  delta_star <- numerator/(count)
  delta_plus <- (2*total_weight)/(N_species*sub_species)
  delta_var <- (2*weight_var)/(N_species*sub_species) #double check that this equation is correct
}
}

years<-2006:2020
delta<-as.data.frame(delta[1:15]) # taxonomic diversity
delta_star<-as.data.frame(delta_star[1:15]) # taxonomic distinctness
delta_plus<-as.data.frame(delta_plus[1:15]) # average taxonomic distinctness
delta_var<-as.data.frame(delta_var[1:15]) # variation in taxonomic distinctness

east_d<-bind_cols(years,delta, delta_star,delta_plus,delta_var)
colnames(east_d)<-c("year", "delta", "delta_star", "delta_plus","delta_var")

#write.csv(d, here("Data/tax_metrics.csv"))

east_tax.distinct<-ggplot()+geom_line(data=subset(east_d, year<2020), aes(year, delta_plus))+theme_classic()+ylab(expression(paste("East\nAverage\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 35))
                                                                        

                                                                        #west
hauls <- unique(west_species$indicator)   
N_hauls <- length(hauls) # number of hauls
N_species <- NULL #N species
sub_species <- NULL # N_species-1
total <- NULL  #xixj
numerator <- NULL #wijxixj
x_y <- matrix(NA, nrow = 6, ncol = 6) 
x <- NULL
y <- NULL
ident <- NULL
weight <- NULL #wij
count <- NULL
total_weight <- NULL
mean_weight <- NULL
weight_var <- NULL
delta <- NULL
delta_star <- NULL
delta_plus <- NULL
delta_var <- NULL
weight_var <- NULL

for (j in 1:N_hauls) {  
diff_hauls <- west_species[which(west_species$indicator == j),] #subset unique hauls/functional groups
N_species[j] <- length(unique(diff_hauls$Species))# count the number of unique species in each haul (denominator)
sub_species[j] <- N_species[j]-1
diff <- unique(as.vector(diff_hauls$Species)) # name of each unique species 
combos <- combn(diff, 2) # create combinations of each species/haul (for weight calc)

phylo <- as.matrix(subset(diff_hauls, select = c(Phylum,Class,Order,Family,Genus, Species))) # extract phylogenetic information only
unique_phylo <- uniquecombs(phylo) # subset by unique species information
unique_phylo <- as.data.frame(unique_phylo)

total <- NULL  # reset the length for each haul because they will be different
weight <- NULL # reset  

for (i in 1:ncol(combos)) { # for each unique combination count the number of each species 
  total[i] <- sum(diff_hauls$Species == combos[1,i]) * sum(diff_hauls$Species == combos[2,i]) #empty vector is always length 210
  #total[i] <- diff_hauls[diff_hauls[,22] == combos[1,i],9] * diff_hauls[diff_hauls[,22] == combos[2,i],9]
  x <- unique_phylo[unique_phylo$Species == combos[1,i],]
  y <- unique_phylo[unique_phylo$Species == combos[2,i],]
  x_y <- rbind(x,y)
  
  for (k in 1:ncol(x_y)){ # for each combination calculate the weight value 
    ident[k] <- identical(as.vector(x_y[1,k]), as.vector(x_y[2,k])) # determine how much of phylogenetic information is the same
    weight[i] <- sum(ident == "FALSE") # vector of weights
    #mean_weight[i] <- mean(weight) #rep(mean(weight),length(weight))
    numerator[j] <- sum(total*weight) 
    count[j] <- sum(total)
    mean_weight[j] <- mean(weight)
    total_weight[j] <- sum(weight)
    weight_var[j] <- sum((weight- mean(weight))^2) 
  }
  delta <- (2*numerator)/(N_species*sub_species)
  delta_star <- numerator/(count)
  delta_plus <- (2*total_weight)/(N_species*sub_species)
  delta_var <- (2*weight_var)/(N_species*sub_species) #double check that this equation is correct
}
}

years<-2006:2020
delta<-as.data.frame(delta[1:15]) # taxonomic diversity
delta_star<-as.data.frame(delta_star[1:15]) # taxonomic distinctness
delta_plus<-as.data.frame(delta_plus[1:15]) # average taxonomic distinctness
delta_var<-as.data.frame(delta_var[1:15]) # variation in taxonomic distinctness

west_d<-bind_cols(years,delta, delta_star,delta_plus,delta_var)
colnames(west_d)<-c("year", "delta", "delta_star", "delta_plus","delta_var")

#write.csv(d, here("Data/tax_metrics.csv"))

west_tax.distinct<-ggplot()+geom_line(data=subset(west_d, year<2020), aes(year, delta_plus))+theme_classic()+ylab(expression(paste("West\nAverage\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 35))
                                                                         
                                                                        
                                                                        #Penobscot
hauls <- unique(pen_species$indicator)   
N_hauls <- length(hauls) # number of hauls
N_species <- NULL #N species
sub_species <- NULL # N_species-1
total <- NULL  #xixj
numerator <- NULL #wijxixj
x_y <- matrix(NA, nrow = 6, ncol = 6) 
x <- NULL
y <- NULL
ident <- NULL
weight <- NULL #wij
count <- NULL
total_weight <- NULL
mean_weight <- NULL
weight_var <- NULL
delta <- NULL
delta_star <- NULL
delta_plus <- NULL
delta_var <- NULL
weight_var <- NULL

for (j in 1:N_hauls) {  
diff_hauls <- pen_species[which(pen_species$indicator == j),] #subset unique hauls/functional groups
N_species[j] <- length(unique(diff_hauls$Species))# count the number of unique species in each haul (denominator)
sub_species[j] <- N_species[j]-1
diff <- unique(as.vector(diff_hauls$Species)) # name of each unique species 
combos <- combn(diff, 2) # create combinations of each species/haul (for weight calc)

phylo <- as.matrix(subset(diff_hauls, select = c(Phylum,Class,Order,Family,Genus, Species))) # extract phylogenetic information only
unique_phylo <- uniquecombs(phylo) # subset by unique species information
unique_phylo <- as.data.frame(unique_phylo)

total <- NULL  # reset the length for each haul because they will be different
weight <- NULL # reset  

for (i in 1:ncol(combos)) { # for each unique combination count the number of each species 
  total[i] <- sum(diff_hauls$Species == combos[1,i]) * sum(diff_hauls$Species == combos[2,i]) #empty vector is always length 210
  #total[i] <- diff_hauls[diff_hauls[,22] == combos[1,i],9] * diff_hauls[diff_hauls[,22] == combos[2,i],9]
  x <- unique_phylo[unique_phylo$Species == combos[1,i],]
  y <- unique_phylo[unique_phylo$Species == combos[2,i],]
  x_y <- rbind(x,y)
  
  for (k in 1:ncol(x_y)){ # for each combination calculate the weight value 
    ident[k] <- identical(as.vector(x_y[1,k]), as.vector(x_y[2,k])) # determine how much of phylogenetic information is the same
    weight[i] <- sum(ident == "FALSE") # vector of weights
    #mean_weight[i] <- mean(weight) #rep(mean(weight),length(weight))
    numerator[j] <- sum(total*weight) 
    count[j] <- sum(total)
    mean_weight[j] <- mean(weight)
    total_weight[j] <- sum(weight)
    weight_var[j] <- sum((weight- mean(weight))^2) 
  }
  delta <- (2*numerator)/(N_species*sub_species)
  delta_star <- numerator/(count)
  delta_plus <- (2*total_weight)/(N_species*sub_species)
  delta_var <- (2*weight_var)/(N_species*sub_species) #double check that this equation is correct
}
}

years<-2006:2020
delta<-as.data.frame(delta[1:15]) # taxonomic diversity
delta_star<-as.data.frame(delta_star[1:15]) # taxonomic distinctness
delta_plus<-as.data.frame(delta_plus[1:15]) # average taxonomic distinctness
delta_var<-as.data.frame(delta_var[1:15]) # variation in taxonomic distinctness

pen_d<-bind_cols(years,delta, delta_star,delta_plus,delta_var)
colnames(pen_d)<-c("year", "delta", "delta_star", "delta_plus","delta_var")

#write.csv(d, here("Data/tax_metrics.csv"))

pen_tax.distinct<-ggplot()+geom_line(data=subset(pen_d, year<2020), aes(year, delta_plus))+theme_classic()+ylab(expression(paste("Penobscot\nAverage\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 35))

                                                                        grid.arrange(east_tax.distinct, west_tax.distinct, pen_tax.distinct, nrow=3)                                                                        

East of Penobscot Bay

#combo plots

#east
r<-ggplot()+geom_line(data=subset(richness, group=="East"),aes(year, richness))+theme_classic()+ylab(expression(paste("Species\nRichness")))+theme(plot.margin = margin(10, 10, 10, 20))

s<-ggplot()+geom_line(data=subset(shannon, group=="East"),aes(year, shannon))+theme_classic()+ylab(expression(paste("Shannon-Weiner\nDiversity")))+theme(plot.margin = margin(10, 10, 10, 20))

e<-ggplot()+geom_line(data=subset(simpson, group=="East"),aes(year, simpsonE))+theme_classic()+ylab(expression(paste("Simpson's\nEvenness")))+theme(plot.margin = margin(10, 10, 10, 20))

t<-ggplot()+geom_line(data = east_d, aes(year, delta_plus))+theme_classic()+ylab(expression(paste("Average\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 25))

grid.arrange(r,s,e,t, nrow=4)

West of Penobscot Bay

#west
r<-ggplot()+geom_line(data=subset(richness, group=="West"),aes(year, richness))+theme_classic()+ylab(expression(paste("Species\nRichness")))+theme(plot.margin = margin(10, 10, 10, 20))

s<-ggplot()+geom_line(data=subset(shannon, group=="West"),aes(year, shannon))+theme_classic()+ylab(expression(paste("Shannon-Weiner\nDiversity")))+theme(plot.margin = margin(10, 10, 10, 20))

e<-ggplot()+geom_line(data=subset(simpson, group=="West"),aes(year, simpsonE))+theme_classic()+ ylab(expression(paste("Simpson's\nEvenness")))+theme(plot.margin = margin(10, 10, 10, 20))

t<-ggplot()+geom_line(data = west_d, aes(year, delta_plus))+theme_classic()+ylab(expression(paste("Average\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 25))

grid.arrange(r,s,e,t, nrow=4)

Penobscot Bay

#pen bay
r<-ggplot()+geom_line(data=subset(richness, group=="Penobscot"),aes(year, richness))+theme_classic()+ylab(expression(paste("Species\nRichness")))+theme(plot.margin = margin(10, 10, 10, 20))

s<-ggplot()+geom_line(data=subset(shannon, group=="Penobscot"),aes(year, shannon))+theme_classic()+ylab(expression(paste("Shannon-Weiner\nDiversity")))+theme(plot.margin = margin(10, 10, 10, 20))

e<-ggplot()+geom_line(data=subset(simpson, group=="Penobscot"),aes(year, simpsonE))+theme_classic()+ ylab(expression(paste("Simpson's\nEvenness")))+theme(plot.margin = margin(10, 10, 10, 20))

t<-ggplot()+geom_line(data = pen_d, aes(year, delta_plus))+theme_classic()+ylab(expression(paste("Average\nTaxinomic\nDistinctness")))+theme(plot.margin = margin(10, 10, 10, 25))

grid.arrange(r,s,e,t, nrow=4)

Functional Groups

County specified data

groups<-read.csv(here("Data/species_groups.csv"))
landings_groups<-mutate(landings_total,COMMON_NAME=tolower(species))%>%
  left_join(groups, by="COMMON_NAME")

landings_groups$functional_group[landings_groups$functional_group==""]<-"undefined"
landings_groups$functional_group[is.na(landings_groups$functional_group)]<-"undefined"

#average weight of each group/ year
year_group<-group_by(landings_groups,year,county, functional_group)%>%
 summarise(weight_avg=mean(weight, na.rm=TRUE))%>%
  group_by(year, functional_group)%>%
  summarise(average_weight=mean(weight_avg, na.rm=TRUE))

#proportion of groups per county/year
year_group_prop<-group_by(landings_groups,year,county)%>%
 mutate(weight_total=sum(weight))%>%
  mutate(prop=weight/weight_total)%>%
  group_by(year, functional_group)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))
  
ggplot(year_group)+
  geom_line(aes(x=year, y=average_weight, color=functional_group))+
  scale_color_colorblind()+ theme_classic()

ggplot(year_group_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=functional_group), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()+ ylab("Proportion of biomass (kg)")+theme(text=element_text(size=18))

Benthivore

benthivore<-filter(landings_groups, functional_group=="benthivore")
group_benthivore<-group_by(benthivore,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(weight, na.rm=TRUE))

benthivore_prop<-group_by(benthivore,year,county)%>%
 mutate(weight_total=sum(weight))%>%
  mutate(prop=weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_benthivore)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(benthivore_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()


Benthos

benthos<-filter(landings_groups, functional_group=="benthos")
group_benthos<-group_by(benthos,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(weight, na.rm=TRUE))

benthos_prop<-group_by(benthos,year,county)%>%
 mutate(weight_total=sum(weight))%>%
  mutate(prop=weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_benthos)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(benthos_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()


Piscivore

piscivore<-filter(landings_groups, functional_group=="piscivore")

order<-group_by(piscivore, COMMON_NAME)%>%
  summarise(avg_weight=mean(weight, na.rm=TRUE))%>%
  arrange(desc(avg_weight))

top_10<-order[1:8,1]
piscivore_top<-inner_join(piscivore, top_10)

group_piscivore<-group_by(piscivore_top,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(weight, na.rm=TRUE))

piscivore_prop<-group_by(piscivore_top,year,county)%>%
 mutate(weight_total=sum(weight))%>%
  mutate(prop=weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_piscivore)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(piscivore_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()


Planktivore

planktivore<-filter(landings_groups, functional_group=="planktivore")
group_planktivore<-group_by(planktivore,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(weight, na.rm=TRUE))

planktivore_prop<-group_by(planktivore,year,county)%>%
 mutate(weight_total=sum(weight))%>%
  mutate(prop=weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_planktivore)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(planktivore_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()


Undefined

undefined<-filter(landings_groups, functional_group=="undefined")

order<-group_by(undefined, COMMON_NAME)%>%
  summarise(avg_weight=mean(weight, na.rm=TRUE))%>%
  arrange(desc(avg_weight))

top_10<-order[1:8,1]
undefined_top<-inner_join(undefined, top_10) 

group_undefined<-group_by(undefined_top,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(weight, na.rm=TRUE))

undefined_prop<-group_by(undefined_top,year,county)%>%
 mutate(weight_total=sum(weight))%>%
  mutate(prop=weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_undefined)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(undefined_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()

Full data

groups<-read.csv(here("Data/species_groups.csv"))
landings_groups_full<-mutate(landings_full,COMMON_NAME=tolower(species))%>%
  left_join(groups, by="COMMON_NAME")

#fill in missing functional groups for new species
landings_missing<-filter(landings_groups_full, is.na(functional_group))

#landings_groups$functional_group[landings_groups$functional_group==""]<-""

#create undefined group
landings_groups_full$functional_group[landings_groups_full$functional_group==""]<-"undefined"
landings_groups_full$functional_group[is.na(landings_groups_full$functional_group)]<-"undefined"

#average weight of each group/ year
year_group_full<-group_by(landings_groups_full,year, functional_group)%>%
 summarise(weight_avg=mean(total_weight, na.rm=TRUE))%>%
  group_by(year, functional_group)%>%
  summarise(average_weight=mean(weight_avg, na.rm=TRUE))

#proportion of groups per year
year_group_prop_full<-group_by(landings_groups_full,year)%>%
 mutate(weight_total=sum(total_weight))%>%
  mutate(prop=total_weight/weight_total)%>%
  group_by(year, functional_group)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))
  
ggplot(year_group_full)+
  geom_line(aes(x=year, y=average_weight, color=functional_group))+
  scale_color_colorblind()+ theme_classic()

ggplot(year_group_prop_full)+
  geom_bar(aes(x=year, y=mean_prop, fill=functional_group), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()+ ylab("Proportion of biomass (kg)")+theme(text=element_text(size=18))

ggplot(subset(year_group_prop_full, year>2005))+
  geom_bar(aes(x=year, y=mean_prop, fill=functional_group), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()+ ylab("Proportion of biomass (kg)")+theme(text=element_text(size=18))

Benthivore

benthivore<-filter(landings_groups_full, functional_group=="benthivore")
group_benthivore<-group_by(benthivore,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(total_weight, na.rm=TRUE))

benthivore_prop<-group_by(benthivore,year)%>%
 mutate(weight_total=sum(total_weight))%>%
  mutate(prop=total_weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_benthivore)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(benthivore_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()

ggplot(subset(benthivore_prop, year>2005))+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()


Benthos

benthos<-filter(landings_groups_full, functional_group=="benthos")
group_benthos<-group_by(benthos,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(total_weight, na.rm=TRUE))

benthos_prop<-group_by(benthos,year)%>%
 mutate(weight_total=sum(total_weight))%>%
  mutate(prop=total_weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_benthos)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(benthos_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()

ggplot(subset(benthos_prop, year>2005))+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()


Piscivore

piscivore<-filter(landings_groups_full, functional_group=="piscivore")

order<-group_by(piscivore, COMMON_NAME)%>%
  summarise(avg_weight=mean(total_weight, na.rm=TRUE))%>%
  arrange(desc(avg_weight))

top_10<-order[1:8,1]
piscivore_top<-inner_join(piscivore, top_10)

group_piscivore<-group_by(piscivore_top,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(total_weight, na.rm=TRUE))

piscivore_prop<-group_by(piscivore_top,year)%>%
 mutate(weight_total=sum(total_weight))%>%
  mutate(prop=total_weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_piscivore)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(piscivore_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()

ggplot(subset(piscivore_prop, year>2005))+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()


Planktivore

planktivore<-filter(landings_groups_full, functional_group=="planktivore")
group_planktivore<-group_by(planktivore,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(total_weight, na.rm=TRUE))

planktivore_prop<-group_by(planktivore,year)%>%
 mutate(weight_total=sum(total_weight))%>%
  mutate(prop=total_weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_planktivore)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(planktivore_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()

ggplot(subset(planktivore_prop, year>2005))+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()


Undefined

undefined<-filter(landings_groups_full, functional_group=="undefined")

order<-group_by(undefined, COMMON_NAME)%>%
  summarise(avg_weight=mean(total_weight, na.rm=TRUE))%>%
  arrange(desc(avg_weight))

top_10<-order[1:8,1]
undefined_top<-inner_join(undefined, top_10) 

group_undefined<-group_by(undefined_top,year, COMMON_NAME)%>%
 summarise(weight_avg=mean(total_weight, na.rm=TRUE))

undefined_prop<-group_by(undefined_top,year)%>%
 mutate(weight_total=sum(total_weight))%>%
  mutate(prop=total_weight/weight_total)%>%
  group_by(year, COMMON_NAME)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))

ggplot(group_undefined)+
  geom_line(aes(x=year, y=weight_avg, color=COMMON_NAME))+
  scale_color_colorblind()+ theme_classic()

ggplot(undefined_prop)+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()

ggplot(subset(undefined_prop, year>2005))+
  geom_bar(aes(x=year, y=mean_prop, fill=COMMON_NAME), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()

Regional functional groups

West

landings_groups$Region<-NULL
landings_groups$Region[landings_groups$county=="York"]<-1
landings_groups$Region[landings_groups$county=="Cumberland"]<-2
landings_groups$Region[landings_groups$county=="Sagadahoc"]<-2
landings_groups$Region[landings_groups$county=="Lincoln"]<-2
landings_groups$Region[landings_groups$county=="Knox"]<-3
landings_groups$Region[landings_groups$county=="Waldo"]<-3
landings_groups$Region[landings_groups$county=="Hancock"]<-4
landings_groups$Region[landings_groups$county=="Washington"]<-5

# group to east/west of Penobscot Bay
landings_groups$group<-NULL
landings_groups$group[landings_groups$Region==1]<-"West"
landings_groups$group[landings_groups$Region==2]<-"West"
landings_groups$group[landings_groups$Region==3]<-"Penobscot"
landings_groups$group[landings_groups$Region==4]<-"East"
landings_groups$group[landings_groups$Region==5]<-"East"

west_groups<-filter(landings_groups, group=="West")

#average weight of each group/ year
year_group_west<-group_by(west_groups,year,group, functional_group)%>%
 summarise(weight_avg=mean(weight, na.rm=TRUE))%>%
  group_by(year, functional_group)%>%
  summarise(average_weight=mean(weight_avg, na.rm=TRUE))

#proportion of groups per county/year
year_group_prop_west<-group_by(west_groups,year,county)%>%
 mutate(weight_total=sum(weight))%>%
  mutate(prop=weight/weight_total)%>%
  group_by(year, functional_group)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))
  

ggplot(year_group_prop_west)+
  geom_bar(aes(x=year, y=mean_prop, fill=functional_group), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()+ ylab("Proportion of biomass (kg)")+theme(text=element_text(size=18))

Penobscot

pen_groups<-filter(landings_groups, group=="Penobscot")

#average weight of each group/ year
year_group_pen<-group_by(pen_groups,year,group, functional_group)%>%
 summarise(weight_avg=mean(weight, na.rm=TRUE))%>%
  group_by(year, functional_group)%>%
  summarise(average_weight=mean(weight_avg, na.rm=TRUE))

#proportion of groups per county/year
year_group_prop_pen<-group_by(pen_groups,year,county)%>%
 mutate(weight_total=sum(weight))%>%
  mutate(prop=weight/weight_total)%>%
  group_by(year, functional_group)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))
  

ggplot(year_group_prop_pen)+
  geom_bar(aes(x=year, y=mean_prop, fill=functional_group), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()+ ylab("Proportion of biomass (kg)")+theme(text=element_text(size=18))

East

east_groups<-filter(landings_groups, group=="East")

#average weight of each group/ year
year_group_east<-group_by(east_groups,year,group, functional_group)%>%
 summarise(weight_avg=mean(weight, na.rm=TRUE))%>%
  group_by(year, functional_group)%>%
  summarise(average_weight=mean(weight_avg, na.rm=TRUE))

#proportion of groups per county/year
year_group_prop_east<-group_by(east_groups,year,county)%>%
 mutate(weight_total=sum(weight))%>%
  mutate(prop=weight/weight_total)%>%
  group_by(year, functional_group)%>%
  summarise(mean_prop=mean(prop, na.rm=TRUE))
  

ggplot(year_group_prop_east)+
  geom_bar(aes(x=year, y=mean_prop, fill=functional_group), position="fill", stat = "identity")+
  scale_fill_colorblind(name="Functional Group")+ theme_classic()+ ylab("Proportion of biomass (kg)")+theme(text=element_text(size=18))