R_回帰分析とbreakpoint のバックアップ(No.1) - アールメカブ

アールメカブ


R_回帰分析とbreakpoint のバックアップ(No.1)


R_Baayen

head(faz, 3) # "1994 年の新聞記事"の中で参照された時代を記録したデータ faz$Distance <- 1:nrow(faz)

plot(log(faz$Distance), log(faz$Frequency + 1), xlab = "log Distance", ylab = "log Frequency")

# p.237 faz$LogFrequency? <- log(faz$Frequency + 1) faz$LogDistance? <- log(faz$Distance) breakpoint <- log(59)

faz$ShiftedLogDistance? <- faz$LogDistance? - breakpoint plot(faz$ShiftedLogDistance?, faz$LogFrequency?, xlab = "log Shifted Distance", ylab = "log Frequnecy")

faz.left <- lm(LogFrequency? ~ ShiftedLogDistance?, data = faz[faz$ShiftedLogDistance? <= 0,])

faz.right <- lm(LogFrequency? ~ ShiftedLogDistance?, data = faz[faz$ShiftedLogDistance? >= 0,])

abline(faz.left, lty = 1) abline(faz.right, lty = 2)

faz$PastBreakPoint? <- as.factor(faz$ShiftedLogDistance? > 0) faz.both <- lm(LogFrequency? ~ ShiftedLogDistance? : PastBreakPoint?, data = faz) anova(faz.both, lm(LogFrequency? ~ ShiftedLogDistance?, data = faz))

# p.237 deviances <- rep(0, nrow(faz) - 1)

for(pos in 1:(nrow(faz) - 1)){

breakpoint <- log(pos)

faz$ShiftedLogDistance? <- faz$LogDistance? - breakpoint

faz$PastBreakPoint? <- as.character(faz$ShiftedLogDistance? > 0)

faz.both <- lm(LogFrequency? ~ ShiftedLogDistance?:PastBreakPoint?,

data = faz)

 deviances[pos] <- deviance(faz.both)

}

# p.239

best <- which(deviances == min(deviances)) best breakpoint <- log(best) faz$ShiftedLogDistance? <- faz$LogDistance? - breakpoint faz$PastBreakPoint? <- as.character(faz$ShiftedLogDistance? > 0) faz.both <- lm(LogFrequency? ~ ShiftedLogDistance?:PastBreakPoint?,

data = faz)

par(mfrow = c(2,2), bg = "white")

plot(log(1:length(deviances)), deviances, type = "l", xlab = "breakpoint",ylab = "deviance") plot( faz$LogDistance?, faz$LogFrequency?, type = "l", xlab = "Log Distance",ylab = "Log Frequency", col = "darkgrey") lines(faz$LogDistance?, fitted(faz.both))