R_回帰分析とbreakpoint
# "1994 年の新聞記事"の中で参照された時代を記録したデータ head(faz, 3) faz$Distance <- 1:nrow(faz) plot(log(faz$Distance), log(faz$Frequency + 1), xlab = "log Distance", ylab = "log Frequency")
# p.237 faz$LogFrequency <- log(faz$Frequency + 1) faz$LogDistance <- log(faz$Distance) breakpoint <- log(59)
faz$ShiftedLogDistance <- faz$LogDistance - breakpoint plot(faz$ShiftedLogDistance, faz$LogFrequency, xlab = "log Shifted Distance", ylab = "log Frequnecy") faz.left <- lm(LogFrequency ~ ShiftedLogDistance, data = faz[faz$ShiftedLogDistance <= 0,]) faz.right <- lm(LogFrequency ~ ShiftedLogDistance, data = faz[faz$ShiftedLogDistance >= 0,]) abline(faz.left, lty = 1) abline(faz.right, lty = 2)
ここでLogFrequency? ~ ShiftedLogDistance? : PastBreakPoint? は 最初の引数で求めた変数のスロープをもとに,データの後半部分()のスロープを求めることを意味する.(Baayen, p.354:6:5)
faz$PastBreakPoint <- as.factor( faz$ShiftedLogDistance > 0) faz.both <- lm(LogFrequency ~ ShiftedLogDistance : PastBreakPoint, data = faz) anova(faz.both, lm(LogFrequency ~ ShiftedLogDistance, data = faz)) # p.237 deviances <- rep(0, nrow(faz) - 1) for(pos in 1:(nrow(faz) - 1)){ breakpoint <- log(pos) faz$ShiftedLogDistance <- faz$LogDistance - breakpoint faz$PastBreakPoint <- as.character( faz$ShiftedLogDistance > 0) faz.both <- lm(LogFrequency ~ ShiftedLogDistance:PastBreakPoint, data = faz) deviances[pos] <- deviance(faz.both) } # p.239 best <- which(deviances == min(deviances)) best breakpoint <- log(best) faz$ShiftedLogDistance <- faz$LogDistance - breakpoint faz$PastBreakPoint <- as.character( faz$ShiftedLogDistance > 0) faz.both <- lm(LogFrequency ~ ShiftedLogDistance:PastBreakPoint, data = faz) par(mfrow = c(2,2), bg = "white") plot(log(1:length(deviances)), deviances, type = "l", xlab = "breakpoint",ylab = "deviance") plot( faz$LogDistance, faz$LogFrequency, type = "l", xlab = "Log Distance",ylab = "Log Frequency", col = "darkgrey") lines(faz$LogDistance, fitted(faz.both))
添付ファイル: baayen240.png 666件
[詳細]
Last-modified: 2007-10-12 (金) 12:44:44 (6183d)