Author Archives: Gene Dan

No. 121: 25 Days of Network Theory – Day 4 – International Petroleum Trade

8 July, 2017 12:41 AM / 1 Comment / Gene Dan

As far as readings go, there wasn’t much to include from the text in today’s post since I just went through a section that covered some basic proof techniques (induction, contradiction, etc.). Tomorrow will be somewhat similar since that section covers general data gathering and manipulation. So today I’ll go over some data I stumbled upon while looking for other texts on graph theory.

The Observatory of Economic Complexity
MIT has some neat data sets here – these contain aggregate trading data for various commodities dating back to 1962. I was interested in looking at crude petroleum movements between countries in the most recent year available, 2014.

Creating a gexf file
Here’s the script I used to generate the gexf file that I imported into gephi. This is pretty much self-contained, and ought to run on your computer as is as long as you have the sqldf package installed. One improvement over previous code is that it fetches the datasets automatically, rather than having me point it out somewhere in my post. I also discovered a useful function called basename() which extracts the file name part of a url string containing a filename.

library(sqldf)

#source urls for datafiles
trade_url <- "http://atlas.media.mit.edu/static/db/raw/year_origin_destination_hs07_6.tsv.bz2"
countries_url <- "http://atlas.media.mit.edu/static/db/raw/country_names.tsv.bz2"

#extract filenames from urls
trade_filename <- basename(trade_url)
countries_filename <- basename(countries_url) 

#download data
download.file(trade_url,destfile=trade_filename)
download.file(countries_url,destfile=countries_filename)

#import data into R
trade <- read.table(file = trade_filename, sep = '\t', header = TRUE)
country_names <- read.table(file = countries_filename, sep = '\t', header = TRUE)

#extract petroleum trade activity from 2014
petro_data <- trade[trade$year==2014 & trade$hs07==270900,]

#we want just the exports to avoid double counting
petr_exp <- petro_data[petro_data$export_val != "NULL",]

#xxb doesn't seem to be a country, remove it
petr_exp <- petr_exp[petr_exp$origin != "xxb" & petr_exp$dest != "xxb",]

#convert export value to numeric
petr_exp$export_val <- as.numeric(petr_exp$export_val)

#take the log of the export value to use as edge weight
petr_exp$export_log <- log(petr_exp$export_val)


petr_exp$origin <- as.character(petr_exp$origin)
petr_exp$dest <- as.character(petr_exp$dest)

#build edges
petr_exp$edgenum <- 1:nrow(petr_exp)
petr_exp$edges <- paste('<edge id="', as.character(petr_exp$edgenum),'" source="', petr_exp$origin, '" target="',petr_exp$dest, '" weight="',petr_exp$export_log,'"/>',sep="")

#build nodes
nodes <- data.frame(id=sort(unique(c(petr_exp$origin,petr_exp$dest))))
nodes <- sqldf("SELECT n.id, c.name
                FROM nodes n
                LEFT JOIN country_names c
                  ON n.id = c.id_3char")

nodes$nodestr <- paste('<node id="', as.character(nodes$id), '" label="',nodes$name, '"/>',sep="")

#build metadata
gexfstr <- '<?xml version="1.0" encoding="UTF-8"?>
<gexf xmlns:viz="http:///www.gexf.net/1.1draft/viz" version="1.1" xmlns="http://www.gexf.net/1.1draft">
<meta lastmodifieddate="2010-03-03+23:44">
<creator>Gephi 0.7</creator>
</meta>
<graph defaultedgetype="undirected" idtype="string" type="static">'

#append nodes
gexfstr <- paste(gexfstr,'\n','<nodes count="',as.character(nrow(nodes)),'">\n',sep="")
fileConn<-file("exports_log_norev.gexf")
for(i in 1:nrow(nodes)){
  gexfstr <- paste(gexfstr,nodes$nodestr[i],"\n",sep="")}
gexfstr <- paste(gexfstr,'</nodes>\n','<edges count="',as.character(nrow(petr_exp)),'">\n',sep="")

#append edges and print to file
for(i in 1:nrow(petr_exp)){
  gexfstr <- paste(gexfstr,petr_exp$edges[i],"\n",sep="")}
gexfstr <- paste(gexfstr,'</edges>\n</graph>\n</gexf>',sep="")
writeLines(gexfstr, fileConn)
close(fileConn)

library(sqldf)

#source urls for datafiles

trade_url <- "http://atlas.media.mit.edu/static/db/raw/year_origin_destination_hs07_6.tsv.bz2"

countries_url <- "http://atlas.media.mit.edu/static/db/raw/country_names.tsv.bz2"

#extract filenames from urls

trade_filename <- basename(trade_url)

countries_filename <- basename(countries_url)

#download data

download.file(trade_url,destfile=trade_filename)

download.file(countries_url,destfile=countries_filename)

#import data into R

trade <- read.table(file = trade_filename, sep = '\t', header = TRUE)

country_names <- read.table(file = countries_filename, sep = '\t', header = TRUE)

#extract petroleum trade activity from 2014

petro_data <- trade[trade$year==2014 & trade$hs07==270900,]

#we want just the exports to avoid double counting

petr_exp <- petro_data[petro_data$export_val != "NULL",]

#xxb doesn't seem to be a country, remove it

petr_exp <- petr_exp[petr_exp$origin != "xxb" & petr_exp$dest != "xxb",]

#convert export value to numeric

petr_exp$export_val <- as.numeric(petr_exp$export_val)

#take the log of the export value to use as edge weight

petr_exp$export_log <- log(petr_exp$export_val)

petr_exp$origin <- as.character(petr_exp$origin)

petr_exp$dest <- as.character(petr_exp$dest)

#build edges

petr_exp$edgenum <- 1:nrow(petr_exp)

petr_exp$edges <- paste('<edge id="', as.character(petr_exp$edgenum),'" source="', petr_exp$origin, '" target="',petr_exp$dest, '" weight="',petr_exp$export_log,'"/>',sep="")

#build nodes

nodes <- data.frame(id=sort(unique(c(petr_exp$origin,petr_exp$dest))))

nodes <- sqldf("SELECT n.id, c.name

FROM nodes n

LEFT JOIN country_names c

ON n.id = c.id_3char")

nodes$nodestr <- paste('<node id="', as.character(nodes$id), '" label="',nodes$name, '"/>',sep="")

#build metadata

gexfstr <- '<?xml version="1.0" encoding="UTF-8"?>

<creator>Gephi 0.7</creator>

</meta>

<graph defaultedgetype="undirected" idtype="string" type="static">'

#append nodes

gexfstr <- paste(gexfstr,'\n','<nodes count="',as.character(nrow(nodes)),'">\n',sep="")

fileConn<-file("exports_log_norev.gexf")

for(i in 1:nrow(nodes)){

gexfstr <- paste(gexfstr,nodes$nodestr[i],"\n",sep="")}

gexfstr <- paste(gexfstr,'</nodes>\n','<edges count="',as.character(nrow(petr_exp)),'">\n',sep="")

#append edges and print to file

for(i in 1:nrow(petr_exp)){

gexfstr <- paste(gexfstr,petr_exp$edges[i],"\n",sep="")}

gexfstr <- paste(gexfstr,'</edges>\n</graph>\n</gexf>',sep="")

writeLines(gexfstr, fileConn)

close(fileConn)

Generating the graph

After importing the gexf file, adjusting the graph for eigenvector centrality, and applying some community detection, gephi produced the following result:

Try clicking on the graph – you can zoom in quite a bit to see the countries and edges in detail. I’ve set the graph so that edge width is proportional to the log of the export value, so the higher the trading volume between two countries, the thicker the edge. We can also see that communities are highlighted in the same color – we would intuitively associate these with trading blocs, or groups of countries that work closely together.

In this graph, the node size is proportional to eigenvector centrality. In other words, the larger the node, the more important the country is to the network. To me, this was kind of puzzling. At least in my mind, I would have thought that major exporting nations like Saudi Arabia would have appeared much larger on the graph. However, you can see from the image that countries associated with importing oil dominate the graph.

I thought maybe it had to do with the direction of the edges. What we have here is a directed graph – if you look carefully you can see that the edges are actually arrows that point from the exporting country to the importing country. If we reverse the direction of these arrows – that is, recreate the graph from the perspective of money flowing into exporting countries rather than goods flowing out of those countries, we get the following graph:

This graph is a little more consistent with my intuition – we can see that major exporting nations like Saudi Arabia, Iraq, and Azerbaijan appear much larger, while importing nations appear smaller. However, I have to caution myself that just because the graph is consistent with my belief, doesn’t mean I’m right. I’ll have to see if I can further understand centrality as I continue in the course.

Posted in: Mathematics

No. 120: 25 Days of Network Theory – Day 3 – Edge and Vertex Connectivity

6 July, 2017 11:16 PM / Leave a Comment / Gene Dan

Oftentimes, we would like to think about the connectivity of a network, that is, how robust is a network to the removal of nodes and edges? I can think of several real-world applications where we might ask this question, for example:

Which roads can we remove from the transportation network such that people can still get to where they want to go?
Which metro stations would cause the most disruption to the network if they were destroyed?
What enemy bases should we target first if we want to disrupt their supply chain?
Where should we add roads, or other links between stations to increase the robustness of the network?
What would happen to a group of friends if certain people left? Would they still hang out?

…and so on.

This brings us to two measures of network cohesiveness – edge connectivity and vertex connectivity. Simply stated, edge connectivity measures the number of edges we would need to remove from a network in order to divide it into distinct components. For example, think about your daily commute. If we were to remove one of the roads on your commute, you would probably still be able to reach your destination, even though you’d have to find some other way to get to work. However, if we were to remove more and more roads, eventually you would be unable to get there, and at some point, if we remove even more roads, your workplace may become an inaccessible island separate from the rest of the transportation network. We would call your workplace a component, that is, a node unreachable from any other node. Edge connectivity measures the minimum number of edges we would have to remove in order to separate a network into distinct components. The higher the edge connectivity, the more edges we’d have to remove – so we can say that the higher this number, the more cohesive the network is.

Formally,
Suppose $G=(V,E)$ is a simple network.

A disconnecting set of $G$ is any subset $S\subseteq E$ for which the network $(V,E-S)$ has more components than $G$ .

S is a cut-set of $G$ if it contains no proper subsets that are disconnecting sets.

The edge connectivity of a network $G=(V,E)$ is the number of edges in the smallest cut-set and is denoted by $\lambda(G)$ . If $\lambda(G)\geq k$ then $G$ is k–edge connected.

Formally, a set of nodes in a connected network is called a separating set if their removal (and the removal of incident edges) disconnects the graph. If the smallest such set has size k then the network is called k–connected and its connectivity, denoted $\kappa(G)$ , is k. If $\kappa(G)=1$ then a node whose removal disconnects the network is known as a cut-vertex.

Likewise, vertex connectivity measures the number of nodes we’d have to remove (along with their associated edges) in order to separate a network into distinct components. For example, consider the Hong Kong subway system:

If you ever have the chance to ride the subway in Hong Kong, I recommend that you do – in my mind it’s the best subway system in the world. In network theory, we call this type of network a connected graph, that is, every subway station is reachable from every other station. If we were to remove Wong Tai Sin from the map, we’d still be able to reach every station from every other station. Likewise, we’d still be able to do so if we removed the stations on the periphery, like Chai Wan or Wu Kai Sha. However, if we were to remove just a single important station, say Quarry Bay, we would sever the eastern portion of the Island Line from the rest of the network. Thus, Quarry Bay is a cut-vertex of the Hong Kong subway system. Since vertex connectivity measures the minimum number of nodes we’d have to remove to separate a graph, we say that Hong Kong’s subway system has a vertex connectivity of 1. Like edge connectivity, the higher the vertex connectivity, the more robust the network.

Looking at our Les Miserables example, it’s pretty obvious that both the edge and vertex connectivity is one:

If we were to remove the edge between Myriel and Napoleon, we would separate the graph. Likewise, if we were to remove Myriel from the picture, we’d also separate the graph. This is a somewhat boring example, and output from the R console confirms this is true:

> #Import gexf file and convert to igraph object > LesMis_gexf <- read.gexf("LesMiserables.gexf") > LesMis_igraph <- gexf.to.igraph(LesMis_gexf) > > #Calculate edge and vertex connectivity > edge_connectivity(LesMis_igraph) [1] 1 > vertex_connectivity(LesMis_igraph) [1] 1 >

1
2
3
4
5
6
7
8
9
10

> #Import gexf file and convert to igraph object
> LesMis_gexf <- read.gexf("LesMiserables.gexf")
> LesMis_igraph <- gexf.to.igraph(LesMis_gexf)
>
> #Calculate edge and vertex connectivity
> edge_connectivity(LesMis_igraph)
[1] 1
> vertex_connectivity(LesMis_igraph)
[1] 1
>

However, one neat thing the igraph package can do is determine the separating sets of minimum size that separate a network. The terminology is a bit different in the igraph documentation, but I was able to find a function that identifies what are called separators. It turns out there are several such separators in our example, and because the vertex connectivity of this network is 1, R returns 8 cut-vertices from the network:

> min_separators(LesMis_igraph) [[1]] + 1/77 vertex, named: [1] Valjean [[2]] + 1/77 vertex, named: [1] Fauchelevent [[3]] + 1/77 vertex, named: [1] MmeBurgon [[4]] + 1/77 vertex, named: [1] Gavroche [[5]] + 1/77 vertex, named: [1] MlleGillenormand [[6]] + 1/77 vertex, named: [1] Mabeuf [[7]] + 1/77 vertex, named: [1] Thenardier [[8]] + 1/77 vertex, named: [1] Myriel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

> min_separators(LesMis_igraph)
[[1]]
+ 1/77 vertex, named:
[1] Valjean

[[2]]
+ 1/77 vertex, named:
[1] Fauchelevent

[[3]]
+ 1/77 vertex, named:
[1] MmeBurgon

[[4]]
+ 1/77 vertex, named:
[1] Gavroche

[[5]]
+ 1/77 vertex, named:
[1] MlleGillenormand

[[6]]
+ 1/77 vertex, named:
[1] Mabeuf

[[7]]
+ 1/77 vertex, named:
[1] Thenardier

[[8]]
+ 1/77 vertex, named:
[1] Myriel

There are some weaknesses to this approach. Just because the removal of a node disconnects a network doesn’t mean we shouldn’t remove such a node in a real world application. If node served as a connection to unimportant parts of a network it may be wise to remove it if the costs of maintaining such a node outweighed the benefits. I’m hoping that tools to address issues such as this will be introduced further along in the course.

Posted in: Mathematics

No. 119: 25 Days of Network Theory – Day 2 – Matrix Representations of Networks

5 July, 2017 7:39 PM / Leave a Comment / Gene Dan

Visualizations of networks can look pretty, but of course without a formal mathematical language to describe the properties of these networks, we will not be able to quantitatively answer the questions we would like to have answered about such networks. Today I’ll introduce two ways of defining networks in terms of matrices – adjacency matrices and incidence matrices, which connects the ideas of network theory with those of linear algebra.
Adjacency Matrices
Suppose $G=(V,E)$ is a simple network where $V={1,2,\ldots,n}$ .
For $1\leq i,j\leq n$ define

$a_{ij}=\left\{\begin{aligned} 1, & \quad (i,j)\in E, \\ 0, &\quad (i,j)\not\in E.\end{aligned}\right.$
Then the square matrix $A=(a_{ij})$ is called the adjacency matrix of $G$ .
Adjacency matrices are a way to define networks from the perspective of their nodes. The notation above simply specifies a square matrix whose rows and columns correspond to the nodes of the network. The each entry will be 1 if there is an edge connecting its corresponding row and column (which represent nodes) and 0 otherwise.
Going back to our Les Miserables example, the graph below:

Is represented by the following adjacency matrix:

Here, the column names are omitted, but they should follow the same order as the row names. If you add up the 1s in a row, the sum should be equal to the degree of the node that it represents. For example, the sum of the row that represents Jean Valjean (highlighted in the matrix above) equals 36. This is both consistent with the data from gephi and the theory represented in the text:
Furthermore, we can see that certain characters who are connected with Myriel have a degree of 1, such as Napoleon, Cravatte, and Champtercier, which are seen in both the matrix and the visual representation of the graph if we zoom in:

A short script was used to import the gexf data, convert it into an igraph object, and from there calculate and print the adjacency matrix.

R

library(rgexf) library(igraph) #Set options to be able to see the entire matrix options(max.print= 10000, width = 10000) #Import gexf file and convert to igraph object LesMis_gexf <- read.gexf("LesMiserables.gexf") LesMis_igraph <- gexf.to.igraph(LesMis_gexf) #Create the adjacency matrix from the data LesMis_adj <- as_adj(LesMis_igraph) #Print the adjacency matrix LesMis_adj

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

library(rgexf)
library(igraph)

#Set options to be able to see the entire matrix
options(max.print= 10000, width = 10000)

#Import gexf file and convert to igraph object
LesMis_gexf <- read.gexf("LesMiserables.gexf")
LesMis_igraph <- gexf.to.igraph(LesMis_gexf)

#Create the adjacency matrix from the data
LesMis_adj <- as_adj(LesMis_igraph)

#Print the adjacency matrix
LesMis_adj

Incidence Matrices
Suppose $G = (V,E)$ is a network where $V={1,2,\ldots,n}$ and $E={e_1,e_2,\ldots,e_m}$ with $e_i=(u_i,v_i)$ .
For $1\leq i\leq m$ and $1\leq j \leq n$ define

$b_{ij}=\left\{\begin{aligned} 1, & \quad u_i=j, \\ 1, &\quad v_i = j, \\ 0 & \quad \text{otherwise.}\end{aligned}\right.$
Then the rectangular matrix $B=(b_{ij})$ is called the incidence matrix of $G$ .
Incidence matrices are a way to define networks from the perspective of edges. The notation above simply means that this time, we’ll have a matrix whose columns represent each edge in the network, and the rows represent nodes. Each entry will be a 1 if the character represented by a row participates in the edge represented by the column, and zero otherwise.
Unfortunately, igraph (at least to my knowledge) can’t calculate the incidence matrix for a nonbipartite graph, so I had to use a workaround I found on stackoverflow. I’ve also altered the in the definition from the original text to be consistent with this workaround (the original had a -1 for the v’s).
The LesMiserables network has 77 nodes and 254 edges, so we’ll have a very wide matrix that is too large to display on this webpage. However, I’ve added a screenshot to give just a glimpse as to what it looks like:

The code used to generate the matrix:

R

library(intergraph) library(network) LesMis_inc <- as.matrix(asNetwork(LesMis_igraph),matrix.type="incidence")

1
2
3

library(intergraph)
library(network)
LesMis_inc <- as.matrix(asNetwork(LesMis_igraph),matrix.type="incidence")

Posted in: Mathematics

No. 118: 25 Days of Network Theory – Day 1 – Introduction

5 July, 2017 11:14 AM / Leave a Comment / Gene Dan

I’ve decided to dedicate this month to the study of network theory. For the past few years, I’ve been intrigued not only by the theory’s simplicity – that a graph is simply a collection of nodes and edges, but also its ability to answer seemingly complex and qualitative questions regarding society (both human and nonhuman) in general, such as:

Who are the most influential members of society?

What are the most natural subdivisions for identifying communities within a population of people?

How can we construct a transportation system that optimizes traffic flow and minimizes traffic jams?

How can small disruptions spread through a network to cause financial crises?

How can we quantify consensus?

How are social norms and reputational effects enforced?

How quickly can a disease spread throughout a community?

…and so on.
At first glance, the above graph may not look like anything special – but there is indeed something very special about it. This network represents co-occurrences between characters in Victor Hugo’s Les Miserables. After we add some labels to identify the characters, and adjust the size of the nodes and labels to reflect the degree (that is, the number of edges a node participates in), the graph becomes more meaningful in that you can immediately identify the most important characters to the plot:
You can see that the largest node is represents Jean Valjean, the main protagonist of the story. This means that, at least according to degree, Jean Valjean is the most important person in the novel. However, there are several other quantitative measures of influence, and we will later see that Jean Valjean may not be the most influential character in the book, at least according to those other measures.
For this course, I’ll be reading Estrada and Knight’s, A First Course in Network Theory, and I’ll be reading 10 pages per day and applying what I learned there to the Les Miserables network.
There wasn’t much to the first ten pages other than to introduce the history of graph theory (there was a section on the Seven Bridges of Königsberg). So, I’ve used this opportunity to introduce what tools I’ll be using for the course of study. There’s gephi, a graph visualization program, igraph, a package that I’ll be using to perform more quantitative analyses on this network in R, and the Les Miserables network dataset itself.

Posted in: Mathematics

No. 117: Amino Acid Structural Formulas with Chemfig (Nonpolar Side Chains)

28 March, 2017 7:46 PM / 1 Comment / Gene Dan

$\LaTeX$ can do some pretty neat things. While it’s mostly known for typesetting mathematical notation, it can also be used to render structural formulas via the chemfig package, which makes it useful in the electronic communication of chemistry concepts – for example, it would allow two chemists located in different countries to chat over a message board, easing collaboration.
I’ll demonstrate some of the capabilities of chemfig by rendering the structural formulas of amino acids – 20 distinct molecules that serve as the building blocks of proteins – that is, the basis of cellular activity. Below are structural formulas for 9 of these, the ones with nonpolar side chains (this means that the side chains are hydrophobic, and do not have an affinity for water).
Amino Acid Structure
The basic structure of an amino acid consists of an asymmetric carbon bonded to four components – a hydrogen atom, an amino group, a carboxyl group, and an R-group. The amino group can act as a base, accepting a hydrogen ion. The carboxyl group can act as an acid (hence the name, amino acid), donating a hydrogen ion. The R-group is what gives a particular amino acid its identity (that is, whether a particular amino acid is glycine, tryptophan, etc.). It is unique for each type of amino acid and dictates its behavior. The amino group of one amino acid can bond with the carboxyl group of another amino acid, forming a peptide bond. When several amino acids are linked in a chain, they form a polypeptide – and one or more polypeptides together form a protein molecule.

This basic structure serves as a template for each of the amino acids below. The amino and carboxyl groups, together with the asymetric carbon and lower hydrogen atom, form the amino acid’s contribution to the polypeptide backbone. These are highlighted in blue. The R-group, distinct for each amino acid, is highlighted in red.
Notice that each of the code samples below contains a repeating pattern – \chemfig renders the structural formula of the amino acid. \chemmove draws the boxes that highlight the backbone and R-groups.

\chemfig{ C(-[:180]@{AR}N(-[:135]H)(-[:225]@{AL}H))(-[:0]@{CL}C(=[:45]@{CR}O)(-[:315]OH))(-[:90]@{R}R)(-[:270]H) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-3pt,yshift=-3pt]R.south west) rectangle ([xshift=4pt,yshift=3pt]R.north east) node[xshift=0pt,yshift=5pt,above,opacity=1,orientation]{R Group (Side Chain)}; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-6pt,yshift=-15pt]AL.south west) node[xshift=15pt,yshift=-6pt,below,opacity=1,orientation]{Amino Group} rectangle ([xshift=3pt,yshift=35pt]AR.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-3pt,yshift=-39pt]CL.south west) node[xshift=24pt,yshift=-6pt,below,opacity=1,orientation]{Carboxyl Group} rectangle ([xshift=12pt,yshift=12pt]CR.north east) ; }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

\chemfig{
C(-[:180]@{AR}N(-[:135]H)(-[:225]@{AL}H))(-[:0]@{CL}C(=[:45]@{CR}O)(-[:315]OH))(-[:90]@{R}R)(-[:270]H)
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-3pt,yshift=-3pt]R.south west)
    rectangle
    ([xshift=4pt,yshift=3pt]R.north east)   node[xshift=0pt,yshift=5pt,above,opacity=1,orientation]{R Group (Side Chain)};
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-6pt,yshift=-15pt]AL.south west) node[xshift=15pt,yshift=-6pt,below,opacity=1,orientation]{Amino Group}
    rectangle
    ([xshift=3pt,yshift=35pt]AR.north east)
;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-3pt,yshift=-39pt]CL.south west) node[xshift=24pt,yshift=-6pt,below,opacity=1,orientation]{Carboxyl Group}
    rectangle
    ([xshift=12pt,yshift=12pt]CR.north east)
;
}

Glycine
For glycine, the R-group is a single hydrogen atom.

\chemfig{ C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O)) (-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})) (-[:90]@{R}H) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-3pt,yshift=-3pt]R.south west) rectangle ([xshift=4pt,yshift=3pt]R.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-9pt,yshift=-7pt]A.south west) rectangle ([xshift=3pt,yshift=5pt]C.north east) ; }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

\chemfig{
C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O))
(-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A}))
(-[:90]@{R}H)
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-3pt,yshift=-3pt]R.south west)
    rectangle
    ([xshift=4pt,yshift=3pt]R.north east) ;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-7pt]A.south west)
    rectangle
    ([xshift=3pt,yshift=5pt]C.north east)
;
}

Alanine
The R-group for alanine is a little more complex than that of glycine, consisting of a methyl group instead of a hydrogen atom.

\chemfig{ C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O)) (-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})) (-[:90]@{RL}CH_{3}) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-3pt,yshift=-5pt]RL.south west) rectangle ([xshift=16pt,yshift=3pt]RL.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-9pt,yshift=-7pt]A.south west) rectangle ([xshift=3pt,yshift=5pt]C.north east) ; }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

\chemfig{
C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O))
(-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A}))
(-[:90]@{RL}CH_{3})
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-3pt,yshift=-5pt]RL.south west)
    rectangle
    ([xshift=16pt,yshift=3pt]RL.north east) ;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-7pt]A.south west)
    rectangle
    ([xshift=3pt,yshift=5pt]C.north east)
;
}

Valine

\chemfig{ C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O)) (-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})) (-[:90]CH(-[:135]CH_{3})(-[:45]@{RR}CH_{3})(-[:180,1,,,draw=none]@{RL})) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-9pt,yshift=-9pt]RL.south west) rectangle ([xshift=15pt,yshift=6pt]RR.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-9pt,yshift=-7pt]A.south west) rectangle ([xshift=3pt,yshift=5pt]C.north east) ; }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

\chemfig{
C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O))
(-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A}))
(-[:90]CH(-[:135]CH_{3})(-[:45]@{RR}CH_{3})(-[:180,1,,,draw=none]@{RL}))
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-9pt]RL.south west)
    rectangle
    ([xshift=15pt,yshift=6pt]RR.north east) ;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-7pt]A.south west)
    rectangle
    ([xshift=3pt,yshift=5pt]C.north east)
;
}

Leucine

\chemfig{ C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O)) (-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})) (-[:90]CH_{2}((-[:90]CH(-[:135]CH_{3})(-[:45]@{RR}CH_{3})))(-[:180,1,,,draw=none]@{RL})) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-9pt,yshift=-9pt]RL.south west) rectangle ([xshift=15pt,yshift=9pt]RR.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-9pt,yshift=-7pt]A.south west) rectangle ([xshift=3pt,yshift=5pt]C.north east) ; }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

\chemfig{
C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O))
(-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A}))
(-[:90]CH_{2}((-[:90]CH(-[:135]CH_{3})(-[:45]@{RR}CH_{3})))(-[:180,1,,,draw=none]@{RL}))
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-9pt]RL.south west)
    rectangle
    ([xshift=15pt,yshift=9pt]RR.north east) ;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-7pt]A.south west)
    rectangle
    ([xshift=3pt,yshift=5pt]C.north east)
;
}

Isoleucine

\chemfig{ C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O)) (-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})) (-[:90]CH(-[:180]@{RL}H_3C)(-[:90]CH_{2}(-[:90]@{RR}CH_{3}))) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-6pt,yshift=-9pt]RL.south west) rectangle ([xshift=18pt,yshift=9pt]RR.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-9pt,yshift=-7pt]A.south west) rectangle ([xshift=3pt,yshift=5pt]C.north east) ; }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

\chemfig{
C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O))
(-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A}))
(-[:90]CH(-[:180]@{RL}H_3C)(-[:90]CH_{2}(-[:90]@{RR}CH_{3})))
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-6pt,yshift=-9pt]RL.south west)
    rectangle
    ([xshift=18pt,yshift=9pt]RR.north east) ;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-7pt]A.south west)
    rectangle
    ([xshift=3pt,yshift=5pt]C.north east)
;
}

Methionine

\chemfig{ C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O)) (-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})) (-[:90]@{RL}CH_{2}(-[:90]CH_{2}(-[:90]S(-[:90]@{RR}CH_{3})))) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-6pt,yshift=-9pt]RL.south west) rectangle ([xshift=18pt,yshift=9pt]RR.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-9pt,yshift=-7pt]A.south west) rectangle ([xshift=3pt,yshift=5pt]C.north east) ; }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

\chemfig{
C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O))
(-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A}))
(-[:90]@{RL}CH_{2}(-[:90]CH_{2}(-[:90]S(-[:90]@{RR}CH_{3}))))
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-6pt,yshift=-9pt]RL.south west)
    rectangle
    ([xshift=18pt,yshift=9pt]RR.north east) ;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-7pt]A.south west)
    rectangle
    ([xshift=3pt,yshift=5pt]C.north east)
;
}

Phenylalanine

\chemfig{ C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O)) (-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})) (-[:90]@{RL}CH_{2}(-[:90]*6(-=-@{RR}=-=))) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-33pt,yshift=-9pt]RL.south west) rectangle ([xshift=36pt,yshift=6pt]RR.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-9pt,yshift=-7pt]A.south west) rectangle ([xshift=3pt,yshift=5pt]C.north east) ; }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

\chemfig{
C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O))
(-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A}))
(-[:90]@{RL}CH_{2}(-[:90]*6(-=-@{RR}=-=)))
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-33pt,yshift=-9pt]RL.south west)
    rectangle
    ([xshift=36pt,yshift=6pt]RR.north east) ;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-7pt]A.south west)
    rectangle
    ([xshift=3pt,yshift=5pt]C.north east)
;
}

Tryptophan
This was definitely the hardest of the amino acids to draw so far. It took multiple attempts to get the ring structure correct. I also had a problem rendering the full image (the top was being cropped off). I actually have an invisible structure embedded in the page code that serves as a workaround to getting the full image rendered.

\chemfig{ C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O)) (-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})) (-[:90]@{RL}CH_{2}(-[:90]?(*6([::-30]=-NH-(*6(--@{RR}----))=?)))) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-33pt,yshift=-9pt]RL.south west) rectangle ([xshift=36pt,yshift=6pt]RR.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-9pt,yshift=-7pt]A.south west) rectangle ([xshift=3pt,yshift=5pt]C.north east) ; } \chemfig{ -[:90,1,,,draw=none](-[:90,1,,,draw=none]) }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

\chemfig{
C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O))
(-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A}))
(-[:90]@{RL}CH_{2}(-[:90]?(*6([::-30]=-NH-(*6(--@{RR}----))=?))))
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-33pt,yshift=-9pt]RL.south west)
    rectangle
    ([xshift=36pt,yshift=6pt]RR.north east) ;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-7pt]A.south west)
    rectangle
    ([xshift=3pt,yshift=5pt]C.north east)
;
}
\chemfig{
-[:90,1,,,draw=none](-[:90,1,,,draw=none])
}

Proline

\chemfig{ C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O)) (-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})(-[:90]@{RL}H_{2}C?)) (-[:90]CH_{2}(-[:120]@{RR}CH_{2}?)) } \chemmove{ \draw[ fill=purple, draw=purple, fill opacity=.2, rounded corners=2pt ] ([xshift=-3pt,yshift=-9pt]RL.south west) rectangle ([xshift=42pt,yshift=9pt]RR.north east) ; } \chemmove{ \draw[ fill=cyan, draw=cyan, fill opacity=.1, rounded corners=2pt ] ([xshift=-9pt,yshift=-7pt]A.south west) rectangle ([xshift=3pt,yshift=5pt]C.north east) ; }

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

\chemfig{
C(-[:0]C(-[:0]@{C}O^{-})(=[:270]O))
(-[:270]H)(-[:180]H_3N^{+}(-[:270,1,,,draw=none]@{A})(-[:90]@{RL}H_{2}C?))
(-[:90]CH_{2}(-[:120]@{RR}CH_{2}?))
}

\chemmove{
  \draw[
    fill=purple,
    draw=purple,
    fill opacity=.2,
    rounded corners=2pt
  ]
    ([xshift=-3pt,yshift=-9pt]RL.south west)
    rectangle
    ([xshift=42pt,yshift=9pt]RR.north east) ;
}

\chemmove{
  \draw[
    fill=cyan,
    draw=cyan,
    fill opacity=.1,
    rounded corners=2pt
  ]
    ([xshift=-9pt,yshift=-7pt]A.south west)
    rectangle
    ([xshift=3pt,yshift=5pt]C.north east)
;
}

Posted in: Uncategorized / Tagged: acid, amino, chemfig, LaTeX, structural formula

Post Navigation
« Previous 1 … 5 6 7 8 9 … 30 Next »