トップ
新規
一覧
単語検索
最終更新
ヘルプ
ログイン
アールメカブ
R_回帰分析とbreakpoint
をテンプレートにして作成
開始行:
[[R_Baayen]]
#ref(baayen240.png,nowrap,center,nolink,時代の変曲点)
# "1994 年の新聞記事"の中で参照された時代を記録したデータ
head(faz, 3)
faz$Distance <- 1:nrow(faz)
plot(log(faz$Distance), log(faz$Frequency + 1),
xlab = "log Distance", ylab = "log Frequency")
# p.237
faz$LogFrequency <- log(faz$Frequency + 1)
faz$LogDistance <- log(faz$Distance)
breakpoint <- log(59)
faz$ShiftedLogDistance <- faz$LogDistance - breakpoint
plot(faz$ShiftedLogDistance, faz$LogFrequency,
xlab = "log Shifted Distance",
ylab = "log Frequnecy")
faz.left <- lm(LogFrequency ~ ShiftedLogDistance,
data = faz[faz$ShiftedLogDistance <= 0,])
faz.right <- lm(LogFrequency ~ ShiftedLogDistance,
data = faz[faz$ShiftedLogDistance >= 0,])
abline(faz.left, lty = 1)
abline(faz.right, lty = 2)
ここでLogFrequency ~ ShiftedLogDistance : PastBreakPoint は
最初の引数で求めた変数のスロープをもとに,データの後半部...
faz$PastBreakPoint <- as.factor(
faz$ShiftedLogDistance > 0)
faz.both <- lm(LogFrequency ~ ShiftedLogDistance :
PastBreakPoint, data = faz)
anova(faz.both, lm(LogFrequency ~ ShiftedLogDistance,
data = faz))
# p.237
deviances <- rep(0, nrow(faz) - 1)
for(pos in 1:(nrow(faz) - 1)){
breakpoint <- log(pos)
faz$ShiftedLogDistance <-
faz$LogDistance - breakpoint
faz$PastBreakPoint <- as.character(
faz$ShiftedLogDistance > 0)
faz.both <- lm(LogFrequency ~
ShiftedLogDistance:PastBreakPoint, data = faz)
deviances[pos] <- deviance(faz.both)
}
# p.239
best <- which(deviances == min(deviances))
best
breakpoint <- log(best)
faz$ShiftedLogDistance <- faz$LogDistance - breakpoint
faz$PastBreakPoint <- as.character(
faz$ShiftedLogDistance > 0)
faz.both <- lm(LogFrequency ~
ShiftedLogDistance:PastBreakPoint, data = faz)
par(mfrow = c(2,2), bg = "white")
plot(log(1:length(deviances)), deviances, type = "l",
xlab = "breakpoint",ylab = "deviance")
plot( faz$LogDistance, faz$LogFrequency, type = "l",
xlab = "Log Distance",ylab = "Log Frequency",
col = "darkgrey")
lines(faz$LogDistance, fitted(faz.both))
終了行:
[[R_Baayen]]
#ref(baayen240.png,nowrap,center,nolink,時代の変曲点)
# "1994 年の新聞記事"の中で参照された時代を記録したデータ
head(faz, 3)
faz$Distance <- 1:nrow(faz)
plot(log(faz$Distance), log(faz$Frequency + 1),
xlab = "log Distance", ylab = "log Frequency")
# p.237
faz$LogFrequency <- log(faz$Frequency + 1)
faz$LogDistance <- log(faz$Distance)
breakpoint <- log(59)
faz$ShiftedLogDistance <- faz$LogDistance - breakpoint
plot(faz$ShiftedLogDistance, faz$LogFrequency,
xlab = "log Shifted Distance",
ylab = "log Frequnecy")
faz.left <- lm(LogFrequency ~ ShiftedLogDistance,
data = faz[faz$ShiftedLogDistance <= 0,])
faz.right <- lm(LogFrequency ~ ShiftedLogDistance,
data = faz[faz$ShiftedLogDistance >= 0,])
abline(faz.left, lty = 1)
abline(faz.right, lty = 2)
ここでLogFrequency ~ ShiftedLogDistance : PastBreakPoint は
最初の引数で求めた変数のスロープをもとに,データの後半部...
faz$PastBreakPoint <- as.factor(
faz$ShiftedLogDistance > 0)
faz.both <- lm(LogFrequency ~ ShiftedLogDistance :
PastBreakPoint, data = faz)
anova(faz.both, lm(LogFrequency ~ ShiftedLogDistance,
data = faz))
# p.237
deviances <- rep(0, nrow(faz) - 1)
for(pos in 1:(nrow(faz) - 1)){
breakpoint <- log(pos)
faz$ShiftedLogDistance <-
faz$LogDistance - breakpoint
faz$PastBreakPoint <- as.character(
faz$ShiftedLogDistance > 0)
faz.both <- lm(LogFrequency ~
ShiftedLogDistance:PastBreakPoint, data = faz)
deviances[pos] <- deviance(faz.both)
}
# p.239
best <- which(deviances == min(deviances))
best
breakpoint <- log(best)
faz$ShiftedLogDistance <- faz$LogDistance - breakpoint
faz$PastBreakPoint <- as.character(
faz$ShiftedLogDistance > 0)
faz.both <- lm(LogFrequency ~
ShiftedLogDistance:PastBreakPoint, data = faz)
par(mfrow = c(2,2), bg = "white")
plot(log(1:length(deviances)), deviances, type = "l",
xlab = "breakpoint",ylab = "deviance")
plot( faz$LogDistance, faz$LogFrequency, type = "l",
xlab = "Log Distance",ylab = "Log Frequency",
col = "darkgrey")
lines(faz$LogDistance, fitted(faz.both))
ページ名: