2010-01-29 22 views
11

Come si crea un grafico comeCome creare un grafico "inkblot" con R?

http://junkcharts.typepad.com/junk_charts/2010/01/leaving-ink-traces.html

dove diverse serie temporali (uno per paese) vengono visualizzati in senso orizzontale come aree simmetriche?

Penso che se riesco a visualizzare una serie temporale in questo modo, è facile generalizzare a diversi utilizzando mfrow.

dati del campione:

#Solar energy production in Europe, by country (EC),(1 000 toe) 
Country,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007 
Belgium,1,1,1,1,1,1,2,2,3,3,3,5 
Bulgaria,-,-,-,-,-,-,-,-,-,-,-,- 
Czech Republic,0,0,0,0,0,0,0,0,2,2,3,4 
Denmark,6,7,7,8,8,8,9,9,9,10,10,11 
Germany (including ex-GDR from 1991),57,70,83,78,96,150,184,216,262,353,472,580 
Estonia,-,-,-,-,-,-,-,-,-,-,-,- 
Ireland,0,0,0,0,0,0,0,0,0,0,1,1 
Greece,86,89,93,97,99,100,99,99,101,102,109,160 
Spain,26,23,26,29,33,38,43,48,58,65,83,137 
France,15,16,17,18,26,19,19,18,19,22,29,37 
Italy,8,9,11,11,12,14,16,18,21,30,38,56 
Cyprus,32,33,34,35,35,34,35,36,40,41,43,54 
Latvia,-,-,-,-,-,-,-,-,-,-,-,- 
Lithuania,-,-,-,-,-,-,-,-,-,-,-,- 
Luxembourg (Grand-Duché),0,0,0,0,0,0,0,0,1,2,2,2 
Hungary,0,0,0,0,0,1,2,2,2,2,2,3 
Netherlands,6,7,8,10,12,14,16,19,20,22,22,23 
Austria,42,48,55,58,64,69,74,80,86,92,101,108 
Poland,0,0,0,0,0,0,0,0,0,0,0,0 
Portugal,16,16,17,18,18,19,20,21,21,23,24,28 
Romania,0,0,0,0,0,0,0,0,0,0,0,0 
Slovenia,-,-,-,-,-,-,-,-,-,-,-,- 
Slovakia,0,0,0,0,0,0,0,0,0,0,0,0 
Finland,0,0,0,0,1,1,1,1,1,1,1,1 
Sweden,4,4,5,5,5,6,4,5,5,6,6,9 
United Kingdom,6,6,7,7,11,13,16,20,25,30,37,46 
Croatia,0,0,0,0,0,0,0,0,0,0,0,1 
Turkey,159,179,210,236,262,287,318,350,375,385,402,420 
Iceland,-,-,-,-,-,-,-,-,-,-,-,- 
Norway,0,0,0,0,0,0,0,0,0,0,0,0 
Switzerland,18,19,21,23,24,26,23,24,25,26,28,30 
#-='Not applicable' or 'Real zero' or 'Zero by default' :=Not available " 
#Source of Data:,Eurostat, http://spreadsheets.google.com/ccc?key=0Agol553XfuDZdFpCQU1CUVdPZ3M0djJBSE1za1NGV0E&hl=en_GB 
#Last Update:,30.04.2009 
#Date of extraction:,17 Aug 2009 07:41:12 GMT, http://epp.eurostat.ec.europa.eu/tgm/table.do?tab=table&init=1&plugin=1&language=en&pcode=ten00082 
+0

ho notato oggi c'è una plotrix fresco :: funzione kiteChart –

risposta

11

È possibile utilizzare polygon nella grafica di base, per esempio

x <- seq(as.POSIXct("1949-01-01", tz="GMT"), length=36, by="months") 
y <- rnorm(length(x)) 
plot(x, y, type="n", ylim=c(-1,1)*max(abs(y))) 
polygon(c(x, rev(x)), c(y, -rev(y)), col="cornflowerblue", border=NA) 

Aggiornamento: Usando panel.polygon da lattice:

library("lattice") 
library("RColorBrewer") 

df <- data.frame(x=rep(x,3), 
       y=rnorm(3*length(x)), 
       variable=gl(3, length(x))) 

p <- xyplot(y~x|variable, data=df, 
      ylim=c(-1,1)*max(abs(y)), 
      layout=c(1,3), 
      fill=brewer.pal(3, "Pastel2"), 
      panel=function(...) { 
       args <- list(...) 
       print(args) 
       panel.polygon(c(args$x, rev(args$x)), 
          c(args$y, -rev(args$y)), 
          fill=args$fill[panel.number()], 
          border=NA) 
      }) 
print(p) 
+0

Questo è veramente bello. Potresti approfondire come impilare più serie temporali insieme? –

+0

Vedere la mia risposta aggiornata ... – rcs

+0

Sembra molto bello! –

11

Con un lit lucidatura TLE, questa soluzione ggplot sarà simile a quello che si vuole:

alt text http://www.imagechicken.com/uploads/1264790429056858700.png

Ecco come farlo dai dati:

require(ggplot2) 

In primo luogo, prendiamo i dati di input e di importazione e ristrutturarla in una forma ggplot piace:

rdata = read.csv("data.csv", 
# options: load '-' as na, ignore first comment line #Solar, 
# strip whitespace that ends line, accept numbers as col headings 
    na.strings="-", skip=1, strip.white=T, check.names=F) 
# Convert to long format and check years are numeric 
data = melt(rdata) 
data = transform(data,year=as.numeric(as.character(variable))) 
# geom_ribbon hates NAs. 
data = data[!is.na(data$value),] 

> summary(data) 
      Country  variable  value    year  
Austria  : 12 1996 : 25 Min. : 0.00 Min. :1996 
Belgium  : 12 1997 : 25 1st Qu.: 0.00 1st Qu.:1999 
Croatia  : 12 1998 : 25 Median : 7.00 Median :2002 
Cyprus  : 12 1999 : 25 Mean : 36.73 Mean :2002 
Czech Republic: 12 2000 : 25 3rd Qu.: 30.00 3rd Qu.:2004 
Denmark  : 12 2001 : 25 Max. :580.00 Max. :2007 
(Other)  :228 (Other):150 

Ora diamo tracciarla:

012.
ggplot(data=data, aes(fill=Country)) + 
    facet_grid(Country~.,space="free", scales="free_y") + 
    opts(legend.position="none") + 
    geom_ribbon(aes(x=year,ymin=-value,ymax=+value)) 
+0

Ottima soluzione! Questo mostra quanto sia potente ggplot2, quando viene usato da mani abili. –

5

Utilizzando primo approccio RCS, qui una soluzione per i dati di esempio con una grafica di base:

rawData <- read.csv("solar.csv", na.strings="-") 
data <- ts(t(as.matrix(rawData[,2:13])), names=rawData[,1], start=1996) 

inkblot <- function(series, col=NULL, min.height=40, col.value=24, col.category=17, ...) { 
    # assumes non-negative values 
    # assumes that series is multivariate series 
    # assumes that series names are set, i.e. colnames(series) != NULL 

    x <- as.vector(time(series)) 
    if(length(col)==0){ 
    col <- rainbow(dim(series)[2]) 
    } 

    ytotal <- 0 
    for(category in colnames(series)) { 
    y <- series[, category] 
    y <- y[!is.na(y)] 
    ytotal <- ytotal + max(y, min.height) 
    } 

    oldpar = par(no.readonly = TRUE) 
    par(mar=c(2,3,0,10)+0.1, cex=0.7) 

    plot(x, 1:length(x), type="n", ylim=c(0,1)*ytotal, yaxt="n", xaxt="n", bty="n", ylab="", xlab="", ...) 
    axis(side=1, at=x) 

    catNumber <- 1 
    offset <- 0 
    for(category in rev(colnames(series))) { 
    print(paste("working on: ", category)) 
    y <- 0.5 * as.vector(series[,category]) 
    offset <- offset + max(max(abs(y[!is.na(y)])), 0.5*min.height) 
    print(paste("offset= ", str(offset))) 
    polygon(c(x, rev(x)), c(offset+y, offset-rev(y)), col=col[catNumber], border=NA) 
    mtext(text=y[1], side=2, at=offset, las=2, cex=0.7, col=col.value) 
    mtext(text=y[length(y)], side=4, line=-1, at=offset, las=2, cex=0.7, col=col.value) 
    mtext(text=category, side=4, line=2, at=offset, las=2, cex=0.7, col=col.category) 
    offset <- offset + max(max(abs(y[!is.na(y)])), 0.5*min.height) 
    catNumber <- catNumber + 1 
    } 
} 


inkblot(data) 

devo ancora capire le linee verticali della griglia e la colorazione trasparente. plot

+0

Nella tua funzione plot hai bisogno del valore 'y' che non esiste. E il calcolo del 'max' in' ytotal' fallisce sui valori di 'NA'. – Marek

+0

Grazie per aver segnalato questo. Ho corretto il codice di conseguenza. –

5

In ritardo a questo gioco, ma ho creato un stacked "blot" chart using ggplot2 and another set of data. Questo utilizza geom_polygon dopo che i dati sono stati appianati.

# data: Masaaki Ishida ([email protected]) 
# http://luna.pos.to/whale/sta.html 

head(blue, 2) 
##  Season Norway U.K. Japan Panama Denmark Germany U.S.A. Netherlands 
## ## [1,] 1931  0 6050  0  0  0  0  0   0 
## ## [2,] 1932 10128 8496  0  0  0  0  0   0 
## ##  U.S.S.R. South.Africa TOTAL 
## ## [1,]  0   0 6050 
## ## [2,]  0   0 18624 

hourglass.plot <- function(df) { 
    stack.df <- df[,-1] 
    stack.df <- stack.df[,sort(colnames(stack.df))] 
    stack.df <- apply(stack.df, 1, cumsum) 
    stack.df <- apply(stack.df, 1, function(x) sapply(x, cumsum)) 
    stack.df <- t(apply(stack.df, 1, function(x) x - mean(x))) 
    # use this for actual data 
    ## coords.df <- data.frame(x = rep(c(df[,1], rev(df[,1])), times = dim(stack.df)[2]), y = c(apply(stack.df, 1, min), as.numeric(apply(stack.df, 2, function(x) c(rev(x),x)))[1:(length(df[,1])*length(colnames(stack.df))*2-length(df[,1]))]), id = rep(colnames(stack.df), each = 2*length(df[,1]))) 

    ## qplot(x = x, y = y, data = coords.df, geom = "polygon", color = I("white"), fill = id) 

    # use this for smoothed data 
    density.df <- apply(stack.df, 2, function(x) spline(x = df[,1], y = x)) 
    id.df <- sort(rep(colnames(stack.df), each = as.numeric(lapply(density.df, function(x) length(x$x))))) 
    density.df <- do.call("rbind", lapply(density.df, as.data.frame)) 
    density.df <- data.frame(density.df, id = id.df) 
    smooth.df <- data.frame(x = unlist(tapply(density.df$x, density.df$id, function(x) c(x, rev(x)))), y = c(apply(unstack(density.df[,2:3]), 1, min), unlist(tapply(density.df$y, density.df$id, function(x) c(rev(x), x)))[1:(table(density.df$id)[1]+2*max(cumsum(table(density.df$id))[-dim(stack.df)[2]]))]), id = rep(names(table(density.df$id)), each = 2*table(density.df$id))) 

    qplot(x = x, y = y, data = smooth.df, geom = "polygon", color = I("white"), fill = id) 
} 

hourglass.plot(blue[,-12]) + opts(title = c("Blue Whale Catch")) 

alt text http://probabilitynotes.files.wordpress.com/2010/06/bluewhalecatch.png

Problemi correlati