OneCompiler

Collocations_Establish_1

134
 library(data.table)
library(dplyr)
library(anytime)
library(tidytext)
library(textclean)
library(stopwords)
library(udpipe)
library(lubridate)
library(ngram)

`%nin%` <- Negate(`%in%`)

fls <- list.files('./outputs/', full.names = TRUE)

#Bigrams........................................................................
fls_bi <- fls[grep('bigram_', fls)]
cntr <- 1

for(i in fls_bi){
  
  print(paste(cntr, '/', length(fls_bi)))
  cntr <- cntr + 1
  
  load(i)
  
  df_bigram_in <- df_bigram %>%
    filter(first_upos %in% c('AUX', 'VERB', 'NOUN', 'ADJ', 'ADV')) %>%
    filter(last_upos %nin% c('PUNCT', 'X'))
  
  if(i == fls_bi[1]){
    dat_bi <- df_bigram_in
  }else{
    dat_bi <- rbind(dat_bi, df_bigram_in)
  }
}

save(dat_bi, file = './combinedFiles/dat_bigram.RData')

#Trigrams........................................................................
fls_tri <- fls[grep('trigram_', fls)]
cntr <- 1

for(i in fls_tri){
  
  print(paste(cntr, '/', length(fls_tri)))
  cntr <- cntr + 1
  
  load(i)
  
  df_trigram_in <- df_trigram %>%
    filter(first_upos %in% c('AUX', 'VERB', 'NOUN', 'ADJ', 'ADV')) %>%
    filter(last_upos %nin% c('PUNCT', 'X')) %>%
    filter(mid_upos %nin% c('PUNCT', 'X'))
  
  if(i == fls_tri[1]){
    dat_tri <- df_trigram_in
  }else{
    dat_tri <- rbind(dat_tri, df_trigram_in)
  }
}

save(dat_tri, file = './combinedFiles/dat_trigram.RData')

#Fourgrams........................................................................
fls_four <- fls[grep('fourgram_', fls)]
cntr <- 1

for(i in fls_four){
  
  print(paste(cntr, '/', length(fls_four)))
  cntr <- cntr + 1
  
  load(i)
  
  df_fourgram_in <- df_fourgram %>%
    filter(first_upos %in% c('AUX', 'VERB', 'NOUN', 'ADJ', 'ADV')) %>%
    filter(last_upos %nin% c('PUNCT', 'X')) %>%
    filter(mid_upos %nin% c('PUNCT', 'X')) %>%
    filter(pen_upos %nin% c('PUNCT', 'X'))
  
  if(i == fls_four[1]){
    dat_four <- df_fourgram_in
  }else{
    dat_four <- rbind(dat_four, df_fourgram_in)
  }
}

save(dat_four, file = './combinedFiles/dat_fourgram.RData')