title
Reproducible Research: Peer Assessment 1
output
Loading and preprocessing the data
library(dplyr )
library(xtable )
library(lattice )
library(lubridate )
data <- read.csv(" activity.csv" )
What is mean total number of steps taken per day?
stepsPerDay <- data %> %
group_by(date ) %> %
summarize(tot = sum(steps , na.rm = T ))
hist(stepsPerDay $ tot , main = " Histogram of Steps per Day" , xlab = " Steps per Day" )
dtMean <- round(mean(stepsPerDay $ tot ),digits = 2 )
dtMedian <- median(stepsPerDay $ tot )
What is the average daily activity pattern?
stepsPerTime <- data %> %
group_by(interval ) %> %
summarize(avg = mean(steps , na.rm = T ))
plot(stepsPerTime $ interval ,stepsPerTime $ avg ,type = " l" ,main = " Avg Steps per Interval" ,xlab = " 5 min interval" ,ylab = " Avg Steps" )
dtMax <- filter(stepsPerTime , avg == max(stepsPerTime $ avg )) %> % select(interval )
Interval with highest average steps: 835
I took the interval's average in order to fill in NA values. A better method might be
to break intervals down by day type
NArows <- data %> %
filter(is.na(steps ))
naRowCount <- count(NArows )
nonNArows <- data %> %
filter(! is.na(steps ))
nonNArows $ steps <- as.numeric(nonNArows $ steps )
avgData <- select(NArows , - steps ) %> %
inner_join(stepsPerTime ) %> %
select(avg , date , interval )
## Joining by: "interval"
colnames(avgData ) <- colnames(nonNArows )
fullData <- dplyr :: union(nonNArows ,avgData )
stepsPerDayFull <- fullData %> %
group_by(date ) %> %
summarize(tot = sum(steps , na.rm = T ))
hist(stepsPerDayFull $ tot , main = " Histogram of Steps per Day w/ Avg for NAs" , xlab = " Steps per Day" )
dtMean <- round(mean(stepsPerDay $ tot ),digits = 2 )
dtMedian <- median(stepsPerDay $ tot )
Mean w/ Interval Avg for NAs: 9354.23
Median w/ Interval Avg for NAs: 10395
Are there differences in activity patterns between weekdays and weekends?
fullData <- mutate(fullData ,dayType = ifelse(wday(as.Date(fullData $ date ),T ) == c(" Sun" ," Sat" )," Weekend" ," Weekday" )) %> %
group_by(dayType , interval ) %> %
summarize(avg = mean(steps )) %> %
arrange(interval )
xyplot(avg ~ interval | dayType , fullData , type = " l" , layout = c(1 ,2 ), xlab = " Interval" , ylab = " Average Steps" ,main = " Average Steps by Day Type" )