I want to download a data from the web, but the code is too long and prone to make an error. Is there any way to use loop for web links? The only value that changes is a number of weeks.
Small example from my code:
library(XML)
# import - week 1
data11=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=1&mid2=2")
data12=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=3&mid2=4")
data13=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=5&mid2=6")
data14=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=7&mid2=8")
data15=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=9&mid2=10")
data16=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=11&mid2=12")
data11 <- data11[[4]]
data12 <- data12[[4]]
data13 <- data13[[4]]
data14 <- data14[[4]]
data15 <- data15[[4]]
data16 <- data16[[4]]
mlb.data1 <- rbind(data11, data12, data13, data14, data15, data16)
# import - week 2
data11=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=1&mid2=2")
data12=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=3&mid2=4")
data13=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=5&mid2=6")
data14=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=7&mid2=8")
data15=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=9&mid2=10")
data16=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=11&mid2=12")
data11 <- data11[[4]]
data12 <- data12[[4]]
data13 <- data13[[4]]
data14 <- data14[[4]]
data15 <- data15[[4]]
data16 <- data16[[4]]
mlb.data2 <- rbind(data11, data12, data13, data14, data15, data16)
# import - week 3
data11=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=1&mid2=2")
data12=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=3&mid2=4")
data13=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=5&mid2=6")
data14=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=7&mid2=8")
data15=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=9&mid2=10")
data16=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=11&mid2=12")
data11 <- data11[[4]]
data12 <- data12[[4]]
data13 <- data13[[4]]
data14 <- data14[[4]]
data15 <- data15[[4]]
data16 <- data16[[4]]
mlb.data3 <- rbind(data11, data12, data13, data14, data15, data16)
# add number of week
mlb.data1$week <- 1
mlb.data2$week <- 2
mlb.data3$week <- 3
# complete table
mlb.complet <- rbind(mlb.data1, mlb.data2, mlb.data3)
This should work, note that link is returning a list of 2 tables, you need to clean it up after readHTMLTable
function.
output <-
do.call(rbind,
lapply(1:2, function(week){
do.call(rbind,
lapply(seq(2,12,2),function(id){
x <- readHTMLTable(paste0(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=",week,"2&mid1=1&mid2=",id))
#choose which tables to keep
res <- x$statTable3
res$WEEK <- week
res$ID <- id
res
}))
})
)
Collected from the Internet
Please contact [email protected] to delete if infringement.
Comments