No. 123: 25 Days of Network Theory – Day 6 – Relative Importance of Ex-Soviet Countries in the Petroleum Trade

<br />

I had originally intended to create graphics for all the world’s countries, but the resulting visualizations looked so cluttered that I felt like I was tripping on acid, so I reduced the scope of today’s post to those nations that used to belong to the Soviet Union.

From the original intention of the post, I changed the petroleum dataset to draw from an MIT dataset going all the way back to 1962, although in retrospect that was unnecessary. A friend of mine suggested that I create some kind of visualization that varied over time, so I’ve done just that. I used igraph to create a network for each year, calculated the eigenvector centrality of each node for each network, and then calculated the relative importance of the ex-Soviet countries to each other in the international sphere.

You can see from these visualizations that immediately after the breakup, Russia was the dominant player, but as the years have gone by, other countries like Azerbaijan and Kazakhstan have become increasingly important, and for this particular commodity, Russia’s power is declining:

I felt like these templates weren’t designed to handle all the ex-Soviet countries, so for the top visualization I hand picked 4 countries that I believed had the most influence. Here’s all of them together:

<br />

library(sqldf)
library(rgexf)
library(igraph)
library(reshape2)
library(plyr)

#source urls for datafiles
trade_url <- "http://atlas.media.mit.edu/static/db/raw/year_origin_destination_sitc_rev2.tsv.bz2"
countries_url <- "http://atlas.media.mit.edu/static/db/raw/country_names.tsv.bz2"

#extract filenames from urls
trade_filename <- basename(trade_url)
countries_filename <- basename(countries_url) 

#download data
download.file(trade_url,destfile=trade_filename)
download.file(countries_url,destfile=countries_filename)

#import data into R
trade <- read.table(file = trade_filename, sep = '\t', header = TRUE)
country_names <- read.table(file = countries_filename, sep = '\t', header = TRUE)


#extract petroleum trade activity
petro_data <- trade[trade$sitc==3330,]

#we want just the exports to avoid double counting
petr_exp <- petro_data[petro_data$export_val != "0.00",]

#xxb doesn't seem to be a country, remove it
petr_exp <- petr_exp[!(petr_exp$origin %in% c("xxa","xxb","xxc","xxd","xxe","xxf","xxg", "xxh")) & !(petr_exp$dest %in% c("xxa","xxb","xxc","xxd","xxe","xxf","xxg", "xxh")),]

#convert export value to numeric
petr_exp$export_val <- as.numeric(petr_exp$export_val)

petr_exp$origin <- as.character(petr_exp$origin)
petr_exp$dest <- as.character(petr_exp$dest)


#take the log of the export value to use as edge weight
petr_exp$export_log <- log(petr_exp$export_val)


#generate a data frame with eigenvector centrality for each year
#there is a separate network generated for each year
petro_eigendata <- c()

for(j in 1992:2014){
#for(j in 2000:2014){  
petr_exp_curryear <- petr_exp[petr_exp$year==j,]


#build edges
petr_exp_curryear$edgenum <- 1:nrow(petr_exp_curryear)
petr_exp_curryear$edges <- paste('<edge id="', as.character(petr_exp_curryear$edgenum),'" source="', petr_exp_curryear$dest, '" target="',petr_exp_curryear$origin, '" weight="',petr_exp_curryear$export_log,'"/>',sep="")


#build nodes
nodes <- data.frame(id=sort(unique(c(petr_exp_curryear$origin,petr_exp_curryear$dest))))
nodes <- sqldf("SELECT n.id, c.name
               FROM nodes n
               LEFT JOIN country_names c
               ON n.id = c.id_3char")

nodes$nodestr <- paste('<node id="', as.character(nodes$id), '" label="',nodes$name, '"/>',sep="")

#build metadata
gexfstr <- '<?xml version="1.0" encoding="UTF-8"?>
<gexf xmlns:viz="http:///www.gexf.net/1.1draft/viz" version="1.1" xmlns="http://www.gexf.net/1.1draft">
<meta lastmodifieddate="2010-03-03+23:44">
<creator>Gephi 0.7</creator>
</meta>
<graph defaultedgetype="undirected" idtype="string" type="static">'

#append nodes
gexfstr <- paste(gexfstr,'\n','<nodes count="',as.character(nrow(nodes)),'">\n',sep="")
fileConn<-file("exp_curryear.gexf")
for(i in 1:nrow(nodes)){
  gexfstr <- paste(gexfstr,nodes$nodestr[i],"\n",sep="")}
gexfstr <- paste(gexfstr,'</nodes>\n','<edges count="',as.character(nrow(petr_exp_curryear)),'">\n',sep="")

#append edges and print to file
for(i in 1:nrow(petr_exp_curryear)){
  gexfstr <- paste(gexfstr,petr_exp_curryear$edges[i],"\n",sep="")}
gexfstr <- paste(gexfstr,'</edges>\n</graph>\n</gexf>',sep="")
writeLines(gexfstr, fileConn)
close(fileConn)

#Import gexf file and convert to igraph object
petr_exp_curryear_gexf <- read.gexf("exp_curryear.gexf")
petr_exp_curryear_igraph <- gexf.to.igraph(petr_exp_curryear_gexf)

curryear_eigen_centrality <- eigen_centrality(petr_exp_curryear_igraph,directed=TRUE,weight=edge_attr(petr_exp_curryear_igraph)$weight)$vector

curryear_eigendata <- data.frame(date=j,curryear_eigen_centrality)
curryear_eigendata$country <- rownames(curryear_eigendata)
rownames(curryear_eigendata) <- NULL

#curryear_eigendata <- curryear_eigendata[curryear_eigendata$country %in% c("United States","Netherlands","United Kingdom","China","Russia"),]
curryear_eigendata <- curryear_eigendata[curryear_eigendata$country %in% c("Russia","Ukraine","Armenia","Azerbaijan","Belarus","Estonia","Georgia","Kazakhstan","Kyrgyzstan","Latvia","Lithuania","Moldova","Tajikistan","Turkmenistan","Uzbekistan"),]
#curryear_eigendata <- curryear_eigendata[order(-curryear_eigen_centrality),]
#curryear_eigendata <- curryear_eigendata[c(1:4),]

curryear_eigendata$eigen_pct <- (curryear_eigendata$curryear_eigen_centrality/sum(curryear_eigendata$curryear_eigen_centrality)) * 100

curryear_eigen_pct <-dcast(curryear_eigendata,date~country,value.var="eigen_pct")


petro_eigendata <- rbind.fill(petro_eigendata,curryear_eigen_pct)
}

petro_eigendata[is.na(petro_eigendata)] <- 0

#export for stack diagram
write.table(petro_eigendata,file='petro_eigendata.tsv',quote=FALSE,sep='\t',row.names=FALSE)

#export for show reel
petro_long <- melt(petro_eigendata,id.vars="date")
names(petro_long) <- c("date","symbol","price")
petro_long <- petro_long[petro_long$symbol %in% c("Russia","Kazakhstan","Ukraine","Azerbaijan"),]
petro_long$symbol <- ifelse(petro_long$symbol=="Russia","RUS",ifelse(petro_long$symbol=="Kazakhstan","KAZ",ifelse(petro_long$symbol=="Ukraine","UKR","AZE")))

write.csv(petro_long,file='petro_long.csv',quote=FALSE,row.names=FALSE)

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

library(sqldf)

library(rgexf)

library(igraph)

library(reshape2)

library(plyr)

#source urls for datafiles

trade_url <- "http://atlas.media.mit.edu/static/db/raw/year_origin_destination_sitc_rev2.tsv.bz2"

countries_url <- "http://atlas.media.mit.edu/static/db/raw/country_names.tsv.bz2"

#extract filenames from urls

trade_filename <- basename(trade_url)

countries_filename <- basename(countries_url)

#download data

download.file(trade_url,destfile=trade_filename)

download.file(countries_url,destfile=countries_filename)

#import data into R

trade <- read.table(file = trade_filename, sep = '\t', header = TRUE)

country_names <- read.table(file = countries_filename, sep = '\t', header = TRUE)

#extract petroleum trade activity

petro_data <- trade[trade$sitc==3330,]

#we want just the exports to avoid double counting

petr_exp <- petro_data[petro_data$export_val != "0.00",]

#xxb doesn't seem to be a country, remove it

petr_exp <- petr_exp[!(petr_exp$origin %in% c("xxa","xxb","xxc","xxd","xxe","xxf","xxg", "xxh")) & !(petr_exp$dest %in% c("xxa","xxb","xxc","xxd","xxe","xxf","xxg", "xxh")),]

#convert export value to numeric

petr_exp$export_val <- as.numeric(petr_exp$export_val)

petr_exp$origin <- as.character(petr_exp$origin)

petr_exp$dest <- as.character(petr_exp$dest)

#take the log of the export value to use as edge weight

petr_exp$export_log <- log(petr_exp$export_val)

#generate a data frame with eigenvector centrality for each year

#there is a separate network generated for each year

petro_eigendata <- c()

for(j in 1992:2014){

#for(j in 2000:2014){

petr_exp_curryear <- petr_exp[petr_exp$year==j,]

#build edges

petr_exp_curryear$edgenum <- 1:nrow(petr_exp_curryear)

petr_exp_curryear$edges <- paste('<edge id="', as.character(petr_exp_curryear$edgenum),'" source="', petr_exp_curryear$dest, '" target="',petr_exp_curryear$origin, '" weight="',petr_exp_curryear$export_log,'"/>',sep="")

#build nodes

nodes <- data.frame(id=sort(unique(c(petr_exp_curryear$origin,petr_exp_curryear$dest))))

nodes <- sqldf("SELECT n.id, c.name

FROM nodes n

LEFT JOIN country_names c

ON n.id = c.id_3char")

nodes$nodestr <- paste('<node id="', as.character(nodes$id), '" label="',nodes$name, '"/>',sep="")

#build metadata

gexfstr <- '<?xml version="1.0" encoding="UTF-8"?>

<creator>Gephi 0.7</creator>

</meta>

<graph defaultedgetype="undirected" idtype="string" type="static">'

#append nodes

gexfstr <- paste(gexfstr,'\n','<nodes count="',as.character(nrow(nodes)),'">\n',sep="")

fileConn<-file("exp_curryear.gexf")

for(i in 1:nrow(nodes)){

gexfstr <- paste(gexfstr,nodes$nodestr[i],"\n",sep="")}

gexfstr <- paste(gexfstr,'</nodes>\n','<edges count="',as.character(nrow(petr_exp_curryear)),'">\n',sep="")

#append edges and print to file

for(i in 1:nrow(petr_exp_curryear)){

gexfstr <- paste(gexfstr,petr_exp_curryear$edges[i],"\n",sep="")}

gexfstr <- paste(gexfstr,'</edges>\n</graph>\n</gexf>',sep="")

writeLines(gexfstr, fileConn)

close(fileConn)

#Import gexf file and convert to igraph object

petr_exp_curryear_gexf <- read.gexf("exp_curryear.gexf")

petr_exp_curryear_igraph <- gexf.to.igraph(petr_exp_curryear_gexf)

curryear_eigen_centrality <- eigen_centrality(petr_exp_curryear_igraph,directed=TRUE,weight=edge_attr(petr_exp_curryear_igraph)$weight)$vector

curryear_eigendata <- data.frame(date=j,curryear_eigen_centrality)

curryear_eigendata$country <- rownames(curryear_eigendata)

rownames(curryear_eigendata) <- NULL

#curryear_eigendata <- curryear_eigendata[curryear_eigendata$country %in% c("United States","Netherlands","United Kingdom","China","Russia"),]

curryear_eigendata <- curryear_eigendata[curryear_eigendata$country %in% c("Russia","Ukraine","Armenia","Azerbaijan","Belarus","Estonia","Georgia","Kazakhstan","Kyrgyzstan","Latvia","Lithuania","Moldova","Tajikistan","Turkmenistan","Uzbekistan"),]

#curryear_eigendata <- curryear_eigendata[order(-curryear_eigen_centrality),]

#curryear_eigendata <- curryear_eigendata[c(1:4),]

curryear_eigendata$eigen_pct <- (curryear_eigendata$curryear_eigen_centrality/sum(curryear_eigendata$curryear_eigen_centrality)) * 100

curryear_eigen_pct <-dcast(curryear_eigendata,date~country,value.var="eigen_pct")

petro_eigendata <- rbind.fill(petro_eigendata,curryear_eigen_pct)

}

petro_eigendata[is.na(petro_eigendata)] <- 0

#export for stack diagram

write.table(petro_eigendata,file='petro_eigendata.tsv',quote=FALSE,sep='\t',row.names=FALSE)

#export for show reel

petro_long <- melt(petro_eigendata,id.vars="date")

names(petro_long) <- c("date","symbol","price")

petro_long <- petro_long[petro_long$symbol %in% c("Russia","Kazakhstan","Ukraine","Azerbaijan"),]

petro_long$symbol <- ifelse(petro_long$symbol=="Russia","RUS",ifelse(petro_long$symbol=="Kazakhstan","KAZ",ifelse(petro_long$symbol=="Ukraine","UKR","AZE")))

write.csv(petro_long,file='petro_long.csv',quote=FALSE,row.names=FALSE)

One Thought on “No. 123: 25 Days of Network Theory – Day 6 – Relative Importance of Ex-Soviet Countries in the Petroleum Trade”

meep on 11 July, 2017 at 4:35 PM said:

The graphs could use a little more in the way of labeling (specifically, the time axis — as we care only about relative importance, the vertical axis doesn’t matter as much)

Reply↓

No. 123: 25 Days of Network Theory – Day 6 – Relative Importance of Ex-Soviet Countries in the Petroleum Trade

One Thought on “No. 123: 25 Days of Network Theory – Day 6 – Relative Importance of Ex-Soviet Countries in the Petroleum Trade”

Leave a Reply Cancel reply

Post Navigation

Archives

Categories

Links

Texas Cycling