I had originally intended to create graphics for all the world’s countries, but the resulting visualizations looked so cluttered that I felt like I was tripping on acid, so I reduced the scope of today’s post to those nations that used to belong to the Soviet Union.
From the original intention of the post, I changed the petroleum dataset to draw from an MIT dataset going all the way back to 1962, although in retrospect that was unnecessary. A friend of mine suggested that I create some kind of visualization that varied over time, so I’ve done just that. I used igraph to create a network for each year, calculated the eigenvector centrality of each node for each network, and then calculated the relative importance of the ex-Soviet countries to each other in the international sphere.
You can see from these visualizations that immediately after the breakup, Russia was the dominant player, but as the years have gone by, other countries like Azerbaijan and Kazakhstan have become increasingly important, and for this particular commodity, Russia’s power is declining:
I felt like these templates weren’t designed to handle all the ex-Soviet countries, so for the top visualization I hand picked 4 countries that I believed had the most influence. Here’s all of them together:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
library(sqldf) library(rgexf) library(igraph) library(reshape2) library(plyr) #source urls for datafiles trade_url <- "http://atlas.media.mit.edu/static/db/raw/year_origin_destination_sitc_rev2.tsv.bz2" countries_url <- "http://atlas.media.mit.edu/static/db/raw/country_names.tsv.bz2" #extract filenames from urls trade_filename <- basename(trade_url) countries_filename <- basename(countries_url) #download data download.file(trade_url,destfile=trade_filename) download.file(countries_url,destfile=countries_filename) #import data into R trade <- read.table(file = trade_filename, sep = '\t', header = TRUE) country_names <- read.table(file = countries_filename, sep = '\t', header = TRUE) #extract petroleum trade activity petro_data <- trade[trade$sitc==3330,] #we want just the exports to avoid double counting petr_exp <- petro_data[petro_data$export_val != "0.00",] #xxb doesn't seem to be a country, remove it petr_exp <- petr_exp[!(petr_exp$origin %in% c("xxa","xxb","xxc","xxd","xxe","xxf","xxg", "xxh")) & !(petr_exp$dest %in% c("xxa","xxb","xxc","xxd","xxe","xxf","xxg", "xxh")),] #convert export value to numeric petr_exp$export_val <- as.numeric(petr_exp$export_val) petr_exp$origin <- as.character(petr_exp$origin) petr_exp$dest <- as.character(petr_exp$dest) #take the log of the export value to use as edge weight petr_exp$export_log <- log(petr_exp$export_val) #generate a data frame with eigenvector centrality for each year #there is a separate network generated for each year petro_eigendata <- c() for(j in 1992:2014){ #for(j in 2000:2014){ petr_exp_curryear <- petr_exp[petr_exp$year==j,] #build edges petr_exp_curryear$edgenum <- 1:nrow(petr_exp_curryear) petr_exp_curryear$edges <- paste('<edge id="', as.character(petr_exp_curryear$edgenum),'" source="', petr_exp_curryear$dest, '" target="',petr_exp_curryear$origin, '" weight="',petr_exp_curryear$export_log,'"/>',sep="") #build nodes nodes <- data.frame(id=sort(unique(c(petr_exp_curryear$origin,petr_exp_curryear$dest)))) nodes <- sqldf("SELECT n.id, c.name FROM nodes n LEFT JOIN country_names c ON n.id = c.id_3char") nodes$nodestr <- paste('<node id="', as.character(nodes$id), '" label="',nodes$name, '"/>',sep="") #build metadata gexfstr <- '<?xml version="1.0" encoding="UTF-8"?> <gexf xmlns:viz="http:///www.gexf.net/1.1draft/viz" version="1.1" xmlns="http://www.gexf.net/1.1draft"> <meta lastmodifieddate="2010-03-03+23:44"> <creator>Gephi 0.7</creator> </meta> <graph defaultedgetype="undirected" idtype="string" type="static">' #append nodes gexfstr <- paste(gexfstr,'\n','<nodes count="',as.character(nrow(nodes)),'">\n',sep="") fileConn<-file("exp_curryear.gexf") for(i in 1:nrow(nodes)){ gexfstr <- paste(gexfstr,nodes$nodestr[i],"\n",sep="")} gexfstr <- paste(gexfstr,'</nodes>\n','<edges count="',as.character(nrow(petr_exp_curryear)),'">\n',sep="") #append edges and print to file for(i in 1:nrow(petr_exp_curryear)){ gexfstr <- paste(gexfstr,petr_exp_curryear$edges[i],"\n",sep="")} gexfstr <- paste(gexfstr,'</edges>\n</graph>\n</gexf>',sep="") writeLines(gexfstr, fileConn) close(fileConn) #Import gexf file and convert to igraph object petr_exp_curryear_gexf <- read.gexf("exp_curryear.gexf") petr_exp_curryear_igraph <- gexf.to.igraph(petr_exp_curryear_gexf) curryear_eigen_centrality <- eigen_centrality(petr_exp_curryear_igraph,directed=TRUE,weight=edge_attr(petr_exp_curryear_igraph)$weight)$vector curryear_eigendata <- data.frame(date=j,curryear_eigen_centrality) curryear_eigendata$country <- rownames(curryear_eigendata) rownames(curryear_eigendata) <- NULL #curryear_eigendata <- curryear_eigendata[curryear_eigendata$country %in% c("United States","Netherlands","United Kingdom","China","Russia"),] curryear_eigendata <- curryear_eigendata[curryear_eigendata$country %in% c("Russia","Ukraine","Armenia","Azerbaijan","Belarus","Estonia","Georgia","Kazakhstan","Kyrgyzstan","Latvia","Lithuania","Moldova","Tajikistan","Turkmenistan","Uzbekistan"),] #curryear_eigendata <- curryear_eigendata[order(-curryear_eigen_centrality),] #curryear_eigendata <- curryear_eigendata[c(1:4),] curryear_eigendata$eigen_pct <- (curryear_eigendata$curryear_eigen_centrality/sum(curryear_eigendata$curryear_eigen_centrality)) * 100 curryear_eigen_pct <-dcast(curryear_eigendata,date~country,value.var="eigen_pct") petro_eigendata <- rbind.fill(petro_eigendata,curryear_eigen_pct) } petro_eigendata[is.na(petro_eigendata)] <- 0 #export for stack diagram write.table(petro_eigendata,file='petro_eigendata.tsv',quote=FALSE,sep='\t',row.names=FALSE) #export for show reel petro_long <- melt(petro_eigendata,id.vars="date") names(petro_long) <- c("date","symbol","price") petro_long <- petro_long[petro_long$symbol %in% c("Russia","Kazakhstan","Ukraine","Azerbaijan"),] petro_long$symbol <- ifelse(petro_long$symbol=="Russia","RUS",ifelse(petro_long$symbol=="Kazakhstan","KAZ",ifelse(petro_long$symbol=="Ukraine","UKR","AZE"))) write.csv(petro_long,file='petro_long.csv',quote=FALSE,row.names=FALSE) |
The graphs could use a little more in the way of labeling (specifically, the time axis — as we care only about relative importance, the vertical axis doesn’t matter as much)