Installation: Get and install the Rdbi and Rdbi.PgSQL packages from BioConductor:
##### Non-Mac users: R # within R, simply do the next two steps (non-Mac users): rS="http://www.bioconductor.org/packages/2.0/bioc/" install.packages("Rdbi", repos=rS, dependencies=TRUE) install.packages("RdbiPgSQL", repos=rS, dependencies=TRUE) ##### Mac users: R # within R, simply do the next two steps (Mac users): rS="http://www.bioconductor.org/packages/2.0/bioc/" install.packages("Rdbi", repos=rS, type="source", dependencies=TRUE) install.packages("RdbiPgSQL", repos=rS, type="source", dependencies=TRUE)
Usage example:
library(Rdbi) library(RdbiPgSQL) # conn becomes an object which contains the DB connection: conn <- dbConnect(PgSQL(), host="pgserver.itc.it", dbname="mydatabase", user="myuser") # see if the connection works (should report the list of table(s) if table(s) are existing): dbListTables(conn)
Reading data from PostgreSQL:
# create an object which contains the SQL query: query <- dbSendQuery(conn, "select mycolumn from mytable") # fetch data according to query: mydata <- dbGetResult(query) # see the data structure: str(mydata) `data.frame': 7641 obs. of 1 variable: $ mycolumn: num 4.88 10.64 14.24 14.25 21.86 ... # another query: query <- dbSendQuery(conn, "select * from streams_length") mydata <- dbGetResult(query) str(mydata) 'data.frame': 99 obs. of 2 variables: $ cat : int 1 2 3 4 5 6 7 8 9 10 ... $ length: num 1273 496 392 134 208 ...
Other example: reading monthly meteo data (we'll use DateTime class)
library(Rdbi) library(RdbiPgSQL) conn <- dbConnect(PgSQL(), host="myserver.somewhere.org", dbname="zecche", user= "xxx") query <- dbSendQuery(conn, "SELECT * FROM pat_monthly_mean_90_99_view WHERE date < '01-01-1991' order by date,station") #NOTE if you use DateTime classes, better load as list month.mean.temp <- as.list(dbGetResult(query)) #update date from char to DateTime class, date will be stored as 'POSIXlt': month.mean.temp$date <- strptime(month.mean.temp$date, "%m-%d-%Y") str(month.mean.temp) names(month.mean.temp) <- c("east","north","quota","station","statid","date","tmonth","unid") str(month.mean.temp) month.mean.temp$date[1] month.mean.temp$date[length(month.mean.temp$date)] library(lattice) #xyplot only accepts POSIXct. So we change the list to a dataframe #(this will convert POSIXlt -> POSIXct): month.mean.temp <- as.data.frame(month.mean.temp) xyplot(tmonth ~ quota | as.character(station), data=month.mean.temp) xyplot(tmonth ~ date | as.character(station), data=month.mean.temp) bwplot(tmonth ~ date | as.character(station), data=month.mean.temp) #see only station 550: xyplot(tmonth ~ date | as.character(stazione), data=month.mean.temp[month.mean.temp$station==550,], type="l") # ^^^
Appending data to PostgreSQL table (the table must exist) Note the quotes!
dbAppendTable(conn, "table", mydata)
Writing data to PostgreSQL table (will create table if it doesn't exist). Note the quotes!
dbWriteTable(conn, mydata, "table")
Note that RODBC can be significantly slower.
To use it, in R, run:
install.packages("RODBC")
To use it, run:
library(RODBC) #establish connection: channel <- odbcConnect("zecche",uid="myusername",case="postgresql") #show tables sqlTables(channel, "zecche") #Read a table of an ODBC database into a data frame: Tsmallmammals <- sqlFetch(channel, "small_mammals") str(Tsmallmammals) #SQL query: query1 <- sqlQuery(channel, "select * from pat_tempariamassima1440 where date >= '2002-01-01' and date < '2002-02-01' order by date, stazione") str(query1) #update 'date' column to 'POSIXlt' datetime class: query1$date <- as.character(query1$date) query1$date <- strptime(query1$date, "%Y-%m-%d") #lattice plot: library(lattice) xyplot(valore ~ date | stazione, data=query1) bwplot(valore ~ date | stazione, data=query1) #select a single station: stazione200 <- subset(query1, query1$stazione==200) str(stazione200) plot(stazione200$date, stazione200$valore, type="l", m="Temp max") #close connection: odbcClose(channel)
Here a test: a table with 7512 obs. (rows) of 151 variables (columns). Method: I was copy-pasting all commands at the same time, so the second Sys.time() was issued immediately. Access from my machine via firewall to another machine outside the firewall. ------------------------------ Rdbi: > Sys.time() [1] "2003-11-04 14:25:46 CET" > query <- dbSendQuery(conn, "select * from small_mammals") > mydata <- dbGetResult(query) > Sys.time() [1] "2003-11-04 14:25:51 CET" -> 5 seconds ------------------------------ RODBC: > Sys.time() [1] "2003-11-04 14:33:06 CET" > query1 <- sqlQuery(channel, "select * from small_mammals") > Sys.time() [1] "2003-11-04 14:37:36 CET" -> 4:30 minutes
library(RSQLite) # define connection: drv <- dbDriver("SQLite") conn <- dbConnect(drv, dbname = "/home/user/database/dat/db_datos_cm.db") # check available tables: dbListTables(conn) # create an object which contains the SQL query: query <- dbSendQuery(conn, "SELECT * FROM mytable") # fetch data according to query: mydata <- fetch(query, n = -1) str(mydata)