Social Media Example

library(MASS)
library(mixtools)
library(igraph)
library(iGraphMatch)
library(class)
library(factoextra)
library(tree)
library(randomForest)
library(caret)
library(e1071)
library(mclust)
library(irlba)
library(ggplot2)
library(tidyverse)
library(socviz)
library(Matrix)
library(flexclust)


ase <- function(A, dim) {
  require(irlba)
  
  #diag(A) <- rowSums(A) / (nrow(A)-1) # diagaug line
  
  if(nrow(A) >= 400){
    A.svd <- irlba(A, nu = dim, nv = dim)
    A.svd.values <- A.svd$d[1:dim]
    A.svd.vectors <- A.svd$v[,1:dim,drop=F]
    if(dim == 1)
      A.coords <- sqrt(A.svd.values) * A.svd.vectors
    else
      A.coords <- A.svd.vectors %*% diag(sqrt(A.svd.values))
  } else{
    A.svd <- svd(A)
    A.svd.values <- A.svd$d[1:dim]
    if(dim == 1)
      A.coords <- matrix(A.svd$v[,1,drop=F] * sqrt(A.svd$d[1]),ncol=dim)
    else
      A.coords <- A.svd$v[,1:dim] %*% diag(sqrt(A.svd$d[1:dim]))
  }
  
  return(list(X=A.coords,D=A.svd.values))
}

make_Omni<- function(A,w)
{
  n<-dim(A[[1]])[1]
  m<-length(A)
  ind<-list()
  for(i in 1:m){
    ind[[i]]<-c( (n*(i-1)+1): (n*i))
  }
  O<-matrix(0,n*m,n*m)
  for(i in 1:m){ 
    for(j in 1:m){
      O[ ind[[i]] , ind[[j]] ]<-as.matrix((w[i]*A[[i]]+w[j]*A[[j]])/(w[i]+w[j]))
    }}
  for(i in 1:m){O[ ind[[i]] , ind[[i]] ]<-as.matrix(A[[i]])}
  return(O)
}

procrustes<-function(X,Y, type = "I"){
  if(type == "C"){
    X <- X/norm(X, type = "F")*sqrt(nrow(X))
    Y <- Y/norm(Y, type = "F")*sqrt(nrow(Y))
  }
  if(type == "D"){
    tX <- rowSums(X^2)
    tX[tX <= 1e-15] <- 1
    tY <- rowSums(Y^2)
    tY[tY <= 1e-15] <- 1
    X <- X/sqrt(tX)
    Y <- Y/sqrt(tY)
  }
  
  tmp <- t(X) %*% Y
  tmp.svd <- svd(tmp)
  W <- tmp.svd$u %*% t(tmp.svd$v)
  return(list(error = norm(X%*%W - Y, type = "F"), W = W))
}

buildOmni <- function(Alist)
{
  # elements of A are assumed symmetric!
  require(Matrix)
  
  m <- length(Alist)
  nvec <- sapply(Alist, nrow)
  nsum <- c(0, cumsum(nvec))
  
  omni <- as.matrix(bdiag(Alist))
  for (i in 1:(m-1)) {
    irng <- (nsum[i]+1):nsum[i+1]
    for (j in (i+1):m) {
      jrng <- (nsum[j]+1):nsum[j+1]
      omni[irng,jrng] <- (as.matrix(Alist[[i]]) + as.matrix(Alist[[j]]))
    }
  }
  omni <- (omni + t(omni)) / 2
  return(omni)
}

getElbows <- function(dat, n = 3, threshold = FALSE, plot = F) {
  if (is.matrix(dat))
    d <- sort(apply(dat,2,sd), decreasing=T)
  else
    d <- sort(dat,decreasing=TRUE)
  
  if (!is.logical(threshold))
    d <- d[d > threshold]
  
  p <- length(d)
  if (p == 0)
    stop(paste("d must have elements that are larger than the threshold ",
               threshold), "!", sep="")
  
  lq <- rep(0.0, p)                     # log likelihood, function of q
  for (q in 1:p) {
    mu1 <- mean(d[1:q])
    mu2 <- mean(d[-(1:q)])              # = NaN when q = p
    sigma2 <- (sum((d[1:q] - mu1)^2) + sum((d[-(1:q)] - mu2)^2)) / (p - 1 - (q < p))
    lq[q] <- sum( dnorm(  d[1:q ], mu1, sqrt(sigma2), log=TRUE) ) +
      sum( dnorm(d[-(1:q)], mu2, sqrt(sigma2), log=TRUE) )
  }
  
  q <- which.max(lq)
  if (n > 1 && q < p) {
    q <- c(q, q + getElbows(d[(q+1):p], n=n-1, plot=FALSE))
  }
  
  if (plot==TRUE) {
    if (is.matrix(dat)) {
      sdv <- d # apply(dat,2,sd)
      plot(sdv,type="b",xlab="dim",ylab="stdev")
      points(q,sdv[q],col=2,pch=19)
    } else {
      plot(dat, type="b")
      points(q,dat[q],col=2,pch=19)
    }
  }
  
  return(q)
}
set.seed(123)

TS2 <- function(A,B){
  return(norm(A-B, "F")^2)
}

derange<-function(n){
  # returns a derangement of 1:n
  c=0
  while( c==0 ){d<-sample(n);
  if(sum(d==1:n)==0){c=1}}
  return(d)
}


social <- read.csv("~/Documents/social.csv", header=FALSE)
ind_tw <- which(social[,3]=="tw")
ind_ff <- which(social[,3]=="ff")
ind_yt <- which(social[,3]=="yt")

social_tw <- social[ind_tw, c(1:2) ]
g_tw <- graph_from_edgelist(cbind(social_tw$V1,social_tw$V2) , directed=FALSE)
v_tw <- names(V(g_tw))

social_ff <- social[ind_ff, c(1:2) ]
g_ff <- graph_from_edgelist(cbind(social_ff$V1,social_ff$V2), directed=FALSE)
v_ff <- names(V(g_ff))

social_yt <- social[ind_yt, c(1:2) ]
g_yt <- graph_from_edgelist(cbind(social_yt$V1,social_yt$V2), directed=FALSE)
v_yt <- names(V(g_yt))

v_co <- intersect(intersect(v_tw, v_ff), v_yt)
n <- length(v_co)
ind1<-rep(0,n)
ind2<-rep(0,n)
ind3<-rep(0,n)
for(i in 1:n){
  ind1[i] <- which(v_yt == v_co[i])
  ind2[i] <- which(v_ff == v_co[i])
  ind3[i] <- which(v_tw == v_co[i])
}

A <- g_yt[]
A <- A[ind1,ind1]

B <- g_ff[]
B <- B[ind2,ind2]

C <- g_tw[]
C <- C[ind3,ind3]

diag(A) <- 0
diag(B) <- 0
diag(C) <- 0
a<-1
b<-1
c<-1
while((a+b+c) > 0){
zA <- which( as.numeric(rowSums(A)) == 0)
zB <- which( as.numeric(rowSums(B)) == 0)
zC <- which( as.numeric(rowSums(C)) == 0)
zt <- union(union(zA, zB), zC)
a<- sum(rowSums(A[-zt,-zt])==0)
b<- sum(rowSums(B[-zt,-zt])==0)
c<- sum(rowSums(C[-zt,-zt])==0)
A <- A[-zt,-zt]
B <- B[-zt,-zt]
C <- C[-zt,-zt]}

ee<-irlba(A,30)
da1<-getElbows(ee$d)[2]

ee<-irlba(B,30)
da2<-getElbows(ee$d)[2]

ee<-irlba(C,30)
db1<-getElbows(ee$d)[2]

d <- max(da1, da2, db1)
X1 <- ase(B,d)$X
X2 <- ase(A,d)$X
Y1 <- ase(C,d)$X


nn<-dim(A)[1]


PPP2C <- matrix(0,6,6)
#PPP2a_un <- matrix(0,6,6)
#PPP2n_un <- matrix(0,6,6)
for(eee in 1:50){
print(eee)
perm<-sample(nn)
X1<-X1[perm,]
X2<-X2[perm,]
Y1<-Y1[perm,]

k <- c(25, 50, 100, 150, 200, 250)
PP <- list()
for(ii in 1:6){
  if(k[ii] != 0){
  pp <- derange(k[ii])
  pp <- c(pp,(k[ii]+1):nn)
  PP[[ii]] <- pp}else{PP[[ii]]<-c(1:nn)}
}

crit_ts2 <- rep(0,6)

for(iii in 1:6){
  t2 <- rep(0,200)
  for(j in 1:200){
    # supress warnings of <0 and >1 entries in X1%*%t(X1)
     suppressWarnings({ AA1 <- sample_dot_product(t(X1),directed=FALSE)[]})
      XX1<-ase(AA1,d)$X
      Phat1<-XX1%*%t(XX1)
      
      # supress warnings of <0 and >1 entries in X1%*%t(X1)
     suppressWarnings({AA2 <- sample_dot_product(t(X1),directed=FALSE)[]})
      XX2<-ase(AA2,d)$X
      Phat2<-XX2%*%t(XX2)
      Phat2shuff<-Phat2[PP[[iii]], PP[[iii]]]
      
      t2[j]<-TS2(Phat1,Phat2shuff)
      #Mk<-c((k[iii]+1):nn)
      #if(k[iii]!=0){
      #unshuffperm <- gm(Phat1, Phat2shuff, seeds = Mk)
      #t2unshuff[j]<-norm(as.matrix(Phat1 - Phat2shuff[unshuffperm$corr_B,unshuffperm$corr_B]), "F")^2}else{
       # t2unshuff[j]<-t2[j]}
      }
  crit_ts2[iii]<-quantile(t2,0.95)
  #crit_ts2unshuff[iii]<-quantile(t2unshuff,0.95)
}

alt_ts2C <- matrix(0,6,200)

for(iii in 1:6){
  for(j in 1:200){
      # supress warnings of <0 and >1 entries in X1%*%t(X1)
     suppressWarnings({AA1 <- sample_dot_product(t(X1),directed=FALSE)[]})
      XX1<-ase(AA1,d)$X
      Phat1<-XX1%*%t(XX1)
      
      # supress warnings of <0 and >1 entries in Y1%*%t(Y1)
     suppressWarnings({BB1 <- sample_dot_product(t(Y1),directed=FALSE)[]})
      YY1<-ase(BB1,d)$X
      Phat3<-YY1%*%t(YY1)
      Phat3shuff<-Phat3[PP[[iii]], PP[[iii]]]
      
      alt_ts2C[iii,j]<-TS2(Phat1,Phat3shuff)
      
      #if(k[iii]!=0){
      #Mk<-c((k[iii]+1):nn)
      #unshuffperm <- gm(Phat1, Phat2shuff, seeds = Mk)
      #alt_ts2n_unshuff[iii,j]<-TS2(Phat1,Phat2shuff[unshuffperm$corr_B,unshuffperm$corr_B])
      #unshuffperm <- gm(Phat1, Phat3shuff, seeds = Mk)
      #alt_ts2a_unshuff[iii,j]<-TS2(Phat1,Phat3shuff[unshuffperm$corr_B,unshuffperm$corr_B])}else{
        #alt_ts2n_unshuff[iii,j]<-alt_ts2n[iii,j]
        #alt_ts2a_unshuff[iii,j]<-alt_ts2a[iii,j]}
  }
}

Pow2C <- matrix(NA,6,6 )
#Pow2a_unshuff <- matrix(NA,6,6 )
#Pow2n_unshuff <- matrix(NA,6,6 )
for(i in 1:6){
  for(j in 1:i){
    Pow2C[i,j] <- sum( alt_ts2C[j,] > crit_ts2[i])/200 
    #Pow2a_unshuff[i,j] <- sum( alt_ts2a_unshuff[j,] > crit_ts2unshuff[i])/200 
    #Pow2n_unshuff[i,j] <- sum( alt_ts2n_unshuff[j,] > crit_ts2unshuff[i])/200 
  }
}
PPP2C <-PPP2C+Pow2C
#PPP2a_un <-PPP2a_un+Pow2a_unshuff
#PPP2n_un <-PPP2n_un+Pow2n_unshuff

}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
PPP2B <- matrix(0,6,6)
#PPP2a_un <- matrix(0,6,6)
#PPP2n_un <- matrix(0,6,6)
for(eee in 1:50){
print(eee)
perm<-sample(nn)
X1<-X1[perm,]
X2<-X2[perm,]
Y1<-Y1[perm,]
k <- c(5, 10, 15, 20, 25, 50)
PP <- list()
for(ii in 1:6){
  if(k[ii] != 0){
  pp <- derange(k[ii])
  pp <- c(pp,(k[ii]+1):nn)
  PP[[ii]] <- pp}else{PP[[ii]]<-c(1:nn)}
}

crit_ts2 <- rep(0,6)

for(iii in 1:6){
  t2 <- rep(0,200)
  for(j in 1:200){
      # supress warnings of <0 and >1 entries in X1%*%t(X1)
     suppressWarnings({AA1 <- sample_dot_product(t(X1),directed=FALSE)[]})
      XX1<-ase(AA1,d)$X
      Phat1<-XX1%*%t(XX1)
      
      # supress warnings of <0 and >1 entries in X1%*%t(X1)
     suppressWarnings({AA2 <- sample_dot_product(t(X1),directed=FALSE)[]})
      XX2<-ase(AA2,d)$X
      Phat2<-XX2%*%t(XX2)
      Phat2shuff<-Phat2[PP[[iii]], PP[[iii]]]
      
      t2[j]<-TS2(Phat1,Phat2shuff)
      #Mk<-c((k[iii]+1):nn)
      #if(k[iii]!=0){
      #unshuffperm <- gm(Phat1, Phat2shuff, seeds = Mk)
      #t2unshuff[j]<-norm(as.matrix(Phat1 - Phat2shuff[unshuffperm$corr_B,unshuffperm$corr_B]), "F")^2}else{
       # t2unshuff[j]<-t2[j]}
      }
  crit_ts2[iii]<-quantile(t2,0.95)
  #crit_ts2unshuff[iii]<-quantile(t2unshuff,0.95)
}

alt_ts2B <- matrix(0,6,200)

for(iii in 1:6){
  for(j in 1:200){
      # supress warnings of <0 and >1 entries in X1%*%t(X1)
     suppressWarnings({AA1 <- sample_dot_product(t(X1),directed=FALSE)[]})
      XX1<-ase(AA1,d)$X
      Phat1<-XX1%*%t(XX1)
      
      # supress warnings of <0 and >1 entries in X2%*%t(X2)
     suppressWarnings({AA2 <- sample_dot_product(t(X2),directed=FALSE)[]})
      XX2<-ase(AA2,d)$X
      Phat2<-XX2%*%t(XX2)
      Phat2shuff<-Phat2[PP[[iii]], PP[[iii]]]
      
      alt_ts2B[iii,j]<-TS2(Phat1,Phat2shuff)
      
      #if(k[iii]!=0){
      #Mk<-c((k[iii]+1):nn)
      #unshuffperm <- gm(Phat1, Phat2shuff, seeds = Mk)
      #alt_ts2n_unshuff[iii,j]<-TS2(Phat1,Phat2shuff[unshuffperm$corr_B,unshuffperm$corr_B])
      #unshuffperm <- gm(Phat1, Phat3shuff, seeds = Mk)
      #alt_ts2a_unshuff[iii,j]<-TS2(Phat1,Phat3shuff[unshuffperm$corr_B,unshuffperm$corr_B])}else{
        #alt_ts2n_unshuff[iii,j]<-alt_ts2n[iii,j]
        #alt_ts2a_unshuff[iii,j]<-alt_ts2a[iii,j]}
  }
}


Pow2B <- matrix(NA,6,6 )
#Pow2a_unshuff <- matrix(NA,6,6 )
#Pow2n_unshuff <- matrix(NA,6,6 )
for(i in 1:6){
  for(j in 1:i){
    Pow2B[i,j] <- sum( alt_ts2B[j,] > crit_ts2[i])/200 
    #Pow2a_unshuff[i,j] <- sum( alt_ts2a_unshuff[j,] > crit_ts2unshuff[i])/200 
    #Pow2n_unshuff[i,j] <- sum( alt_ts2n_unshuff[j,] > crit_ts2unshuff[i])/200 
  }
}
PPP2B <-PPP2B+Pow2B
#PPP2a_un <-PPP2a_un+Pow2a_unshuff
#PPP2n_un <-PPP2n_un+Pow2n_unshuff

}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
PPP2B<-PPP2B/50
PPP2C<-PPP2C/50

##############################################################################
#------------------------ GRAPHING ------------------------------------------
###########################################################################

library(ggplot2)
library(reshape2)
library(viridis)
## Loading required package: viridisLite
library(colorspace)
library(extrafont)
## Registering fonts with R
colors <- (qualitative_hcl(6, "Dark 3"))
points <- c(19, 17, 15, 8, 15, 18)

pow_C <- na.omit(melt(PPP2C))$value
nshuffH1<-c(rep(25,6),rep(50,5), rep(100,4), rep(150,3), rep(200,2), rep(250,1))
X<-cbind(pow_C,nshuffH1)
nshuffH0<-c(25, 50, 100, 150, 200, 250, 50, 100, 150, 200, 250,100, 150, 200, 250,150, 200, 250,200, 250,250)
X<-cbind(X,nshuffH0)
X<-data.frame(X)
X$nshuffH0<-as.factor(X$nshuffH0)

gpa<-ggplot(X, aes(x=nshuffH1, y=pow_C, color=nshuffH0))+theme_classic()
gpa <- gpa + geom_line(aes(linetype=nshuffH0),size=1.05) +  
  geom_point(aes(shape=nshuffH0),size=4) +
  geom_errorbar(aes(ymin=pow_C-2*sqrt(pow_C*(1-pow_C)/50),ymax=pow_C+2*sqrt(pow_C*(1-pow_C)/50)),
                width=20)+
  labs(family="serif", title="Power Testing: Twitter vs. Friendfeed", x="Number of Vertices Shuffled in H1", y="Power", color='Vert. shuff\n in H0', shape='Vert. shuff\n in H0',linetype='Vert. shuff\n in H0')  +  
  theme(plot.title = element_text(family="serif", face = "bold", size = 17, hjust=0.5), 
        panel.background = element_rect(fill = "#FFFFFF"), 
        legend.title = element_text(family="serif", face="bold", color = "#000000", size = 12),
        legend.background = element_rect(fill = "#FFFFFF"), 
        text = element_text(size=20, family = "serif"),
        axis.title = element_text(family = "serif", size = 15, colour = "#000000"),
        plot.background = element_rect(fill = "#FFFFFF"),
        axis.line = element_line(colour = "#000000", size = 1), 
        legend.text = element_text( family="serif", color = "#000000", size=15)) + scale_colour_manual(values=colors) + scale_shape_manual(values=points)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
pow_B <- na.omit(melt(PPP2B))$value
nshuffH1<-c(rep(5,6),rep(10,5), rep(15,4), rep(20,3), rep(25,2), rep(50,1))
Y<-cbind(pow_B,nshuffH1)
nshuffH0<-c(5, 10, 15, 20, 25, 50, 10, 15, 20, 25, 50,15, 20, 25, 50, 20, 25, 50,25, 50,50)
Y<-cbind(Y,nshuffH0)
Y<-data.frame(Y)
Y$nshuffH0<-as.factor(Y$nshuffH0)

gpa2<-ggplot(Y, aes(x=nshuffH1, y=pow_B, color=nshuffH0)) + theme_classic() 
gpa2<- gpa2 + geom_line(aes(linetype=nshuffH0),size=1.05) +
  geom_errorbar(aes(ymin=pow_B-2*sqrt(pow_B*(1-pow_B)/50),ymax=pow_B+2*sqrt(pow_B*(1-pow_B)/50)),
                width=5)+ 
  geom_point(aes(shape=nshuffH0),size=4) +
  labs(family="serif", title="Power Testing: Youtube vs. Friendfeed", x="Number of Vertices Shuffled in H1", y="Power", color='Vert. shuff\n in H0', shape='Vert. shuff\n in H0',linetype='Vert. shuff\n in H0')+
  theme(plot.title = element_text(family="serif", face = "bold", size = 17, hjust=0.5), 
        panel.background = element_rect(fill = "#FFFFFF"), 
        legend.title = element_text(family="serif", face="bold", color = "#000000", size = 12),
        legend.background = element_rect(fill = "#FFFFFF"), 
        text = element_text(size=20, family = "serif"),
        axis.title = element_text(family = "serif", size = 15, colour = "#000000"),
        plot.background = element_rect(fill = "#FFFFFF"),
        axis.line = element_line(colour = "#000000", size = 1), 
        legend.text = element_text( family="serif", color = "#000000", size=15)) + scale_colour_manual(values=colors) + scale_shape_manual(values=points)

library(patchwork)
## 
## Attaching package: 'patchwork'
## The following object is masked from 'package:MASS':
## 
##     area
(gpa+theme(legend.position = "bottom")) + (gpa2 +theme(legend.position = "bottom"))