Import data

Read data from .csv datasheet.

import math
import pandas as pd

df = pd.read_csv('~/Desktop/clustering/cluster/results/clustered/prop_raw_grouped_rotate.csv', dtype = str)
df2 = pd.read_csv('~/Desktop/clustering/cluster/results/clustered/prop_raw_grouped.csv', dtype = str)
library(reticulate)
library(ggpubr)
## Loading required package: ggplot2
library(ggsci)
library(ggplot2)
library(gridExtra)
library(grid)
theme_set(theme_pubr())
data <- py$df
data$CM <- as.numeric(data$CM)
data$LN <- as.numeric(data$LN)
data$WoS <- as.numeric(data$WoS)
data
data2 <- py$df2
data2$Prop <- as.numeric(data2$Prop)
data2$Year <- as.numeric(data2$Year)
data2

Cluster Groups (Group Name : Cluster No.):

Time vs. Cluster Group

regression fitting

for (corpus in c("CM", "LN", "WoS")){
  p1 <- ggplot(data2[data2$Corpus == corpus,], aes(x = Year, y=Prop, color = Cluster_Group) ) +
      geom_point(alpha = 0.7) +
      stat_smooth(method = "loess", alpha = 0.1) +
      labs(x = "Time (Year)", y = "Prop", title="Loess") + theme_minimal() + scale_color_npg()
  p2 <- ggplot(data2[data2$Corpus == corpus,], aes(x = Year, y=Prop, color = Cluster_Group) ) +
      geom_point(alpha = 0.7) +
      stat_smooth(method = lm, formula = y ~ splines::bs(x, df = 3), alpha = 0.1) +
      labs(x = "Time (Year)", y = "Prop", title="Three-degree spline") + theme_minimal() + scale_color_npg()
  
  grid.arrange(p1, p2, ncol = 2,widths = c(1, 1), top = textGrob(corpus,gp=gpar(fontsize=20, col="grey",fontface = "bold")))
}
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

Corpus vs. Corpus

regression fitting

ggplot(data, aes(x = WoS, y=CM, color = Cluster_Group) ) +
      geom_point(alpha = 0.7) +
      stat_smooth(method = "loess", alpha = 0.1) +
      labs(x = "WoS", y = "CM", title="Loess") + theme_minimal() + scale_color_npg()
## `geom_smooth()` using formula 'y ~ x'

ggplot(data, aes(x = WoS, y=LN, color = Cluster_Group) ) +
      geom_point(alpha = 0.7) +
      stat_smooth(method = "loess", alpha = 0.1) +
      labs(x = "WoS", y = "LN", title="Loess") + theme_minimal() + scale_color_npg()
## `geom_smooth()` using formula 'y ~ x'

ggplot(data, aes(x = CM, y=LN, color = Cluster_Group) ) +
      geom_point(alpha = 0.7) +
      stat_smooth(method = "loess", alpha = 0.1) +
      labs(x = "CM", y = "LN", title="Loess") + theme_minimal() + scale_color_npg()
## `geom_smooth()` using formula 'y ~ x'