# First, get daily counts out of sqlite: echo "year month day count" > table sqlite3 journal.db 'select year, month, day, count(*) from entry group by year, month, day' \ | sed -e 's/|/\t/g' >> table</pre> # and read it into R: data = read.delim('table') attach(data) # we now have vectors "year", "month", "day", and "count".</i> # create a vector of date objects: dates = as.Date(paste(year, month, day), "%Y %m %d") # unfortunately, this vector only has dates that have nonzero counts. # so first create a vector of all dates in the range we care about: alldates = seq(min(dates), max(dates), by='1 day') # then merge these with the existing list, creating holes where # we didn't already have a count: d = merge(data.frame(date=dates, count=count), data.frame(date=alldates), by='date', all.y=T) # by='date' means "join on the 'date' column", # and all.y=T means "put in holes when the second list # has a value that the first doesn't". # now fill in the holes ("NA" values) with zeros: d$count[is.na(d$count)] = 0 # get the weekdays of these dates into an ordered factor: wd = ordered(weekdays(d$date, abbreviate=T), levels=c('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')) # plot weekdays versus entry counts per day. boxplot(d$count ~ wd, ylim=c(0,15), main="Posts per weekday")# as you'd expect, weekends are different. # plot average posts per day over a 60-day window. plot(d$date, filter(d$count, rep(1/60,60)), type='l', main='Posts per Day', ylab='average posts per day', xlab='year')
lj data
-
blog moved
As described elsewhere, I've quit LiveJournal. If you're interested in my continuing posts, you should look at one of these (each contains feed…
-
dremel
They published a paper on Dremel, my favorite previously-unpublished tool from the Google toolchest. Greg Linden discusses it: "[...] it is capable…
-
treemaps
I finally wrote up my recent adventures in treemapping, complete with nifty clickable visualizations.
- Post a new comment
- 4 comments
- Post a new comment
- 4 comments