# clear environment rm(list = ls()); gc() ## load libs library(data.table) library(magrittr) library(markovchain) ## dummy data number_nodes <- 10 node_names <- letters[seq_len(number_nodes)] set.seed(1) nodes <- sapply( seq_len(number_nodes), function(x) { paste( c( node_names[-x] , sample(node_names[-x], sample(1:5, 1), replace = T) ) , collapse = ' ' ) } ) names(nodes) <- node_names print( paste(nodes, collapse = '; ') ) ## make long dt dt <- data.table( citing_node = node_names , cited_node = nodes ) %>% .[, .(cited_node = unlist(strsplit(cited_node, ' '))), by = citing_node] %>% dcast( . , cited_node ~ citing_node , fun.aggregate = length ) dt apply(dt[,-1,with=F], 2, sum) ## affinity matrix A <- as.matrix(dt[, 2:dim(dt)[2]]) A <- sweep(A, 2, colSums(A), `/`) A[is.nan(A)] <- 0 rowSums(A) colSums(A) mark <- new("markovchain", transitionMatrix = t(A), states = node_names, name = "mark") plot(mark)
By the way, I recommend using the data.table package for experiments. In conjunction with the principles of tidyverse, everything turns out efficiently and quickly.
> apply (dt [, - 1, with = F], 2, sum)
abcdefghij
11 14 10 10 11 13 11 11 11 12
> colSums (A)
abcdefghij
1 1 1 1 1 1 1 1 1 1
## calculate pagerank ei <- eigen(A) as.numeric(ei$values[1]) pr <- as.numeric(ei$vectors[,1] / sum(ei$vectors[,1])) sum(pr) names(pr) <- node_names print(round(pr, 2))
> print (round (pr, 2))
abcdefghij
0.09 0.11 0.09 0.10 0.10 0.11 0.10 0.11 0.08 0.11
dim.1 <- dim(A)[1] A <- as.data.table(A) nul_cols <- apply(A, 2, function(x) sum(x) == 0) if( sum(nul_cols) > 0 ) { A[ , (colnames(A)[nul_cols]) := lapply(.SD, function(x) 1 / dim.1) , .SDcols = colnames(A)[nul_cols] ] } A <- as.matrix(A)
d = 0.15 #damping factor (to ensure algorithm convergence) M <- (1 - d) * A + d * (1 / dim.1 * matrix(1, nrow = dim.1, ncol = dim.1)) # google matrix
## pagerank function (tested on example from tutorial) rm(pagerank_func) pagerank_func <- function( A #transition matrix , eps = 0.00001 #sufficiently small error , d = 0.15 #damping factor (to ensure algorithm convergence) ) { dim.1 <- dim(A)[1] A <- as.data.table(A) nul_cols <- apply(A, 2, function(x) sum(x) == 0) if( sum(nul_cols) > 0 ) { A[ , (colnames(A)[nul_cols]) := lapply(.SD, function(x) 1 / dim.1) , .SDcols = colnames(A)[nul_cols] ] } A <- as.matrix(A) M <- (1 - d) * A + d * (1 / dim.1 * matrix(1, nrow = dim.1, ncol = dim.1)) # google matrix rank = as.numeric(rep(1 / dim.1, dim.1)) ## iterate until convergence while( sum( (rank - M %*% rank) ^ 2 ) > eps ) { rank <- M %*% rank } return(rank) } pr2 <- pagerank_func(A) pr2=as.vector(pr2) names(pr2)=node_names
> print (round (pr, 2))
abcdefghij
0.09 0.11 0.09 0.10 0.10 0.11 0.10 0.11 0.08 0.11
> print (round (pr2, 2))
abcdefghij
0.09 0.11 0.09 0.10 0.10 0.11 0.10 0.11 0.08 0.11
# clear environment rm(list = ls()); gc() ## load libs library(data.table) library(magrittr) library(markovchain) ## dummy data number_nodes <- 10 node_names <- letters[seq_len(number_nodes)] set.seed(1) nodes <- sapply( seq_len(number_nodes), function(x) { paste( c( node_names[-x] , sample(node_names[-x], sample(1:5, 1), replace = T) ) , collapse = ' ' ) } ) names(nodes) <- node_names print( paste(nodes, collapse = '; ') ) ## make long dt dt <- data.table( citing_node = node_names , cited_node = nodes ) %>% .[, .(cited_node = unlist(strsplit(cited_node, ' '))), by = citing_node] %>% dcast( . , cited_node ~ citing_node , fun.aggregate = length ) dt apply(dt[,-1,with=F], 2, sum) ## affinity matrix A <- as.matrix(dt[, 2:dim(dt)[2]]) A <- sweep(A, 2, colSums(A), `/`) A[is.nan(A)] <- 0 rowSums(A) colSums(A) mark <- new("markovchain", transitionMatrix = t(A), states = node_names, name = "mark") plot(mark) ## calculate pagerank ei <- eigen(A) as.numeric(ei$values[1]) pr <- as.numeric(ei$vectors[,1] / sum(ei$vectors[,1])) sum(pr) names(pr) <- node_names print(round(pr, 2)) ## pagerank function (tested on example from tutorial) rm(pagerank_func) pagerank_func <- function( A #transition matrix , eps = 0.00001 #sufficiently small error , d = 0.15 #damping factor (to ensure algorithm convergence) ) { dim.1 <- dim(A)[1] A <- as.data.table(A) nul_cols <- apply(A, 2, function(x) sum(x) == 0) if( sum(nul_cols) > 0 ) { A[ , (colnames(A)[nul_cols]) := lapply(.SD, function(x) 1 / dim.1) , .SDcols = colnames(A)[nul_cols] ] } A <- as.matrix(A) M <- (1 - d) * A + d * (1 / dim.1 * matrix(1, nrow = dim.1, ncol = dim.1)) # google matrix rank = as.numeric(rep(1 / dim.1, dim.1)) ## iterate until convergence while( sum( (rank - M %*% rank) ^ 2 ) > eps ) { rank <- M %*% rank } return(rank) } pr2 <- pagerank_func(A) pr2=as.vector(pr2) names(pr2)=node_names print(round(pr, 2)) print(round(pr2, 2))
Source: https://habr.com/ru/post/439332/