increasingly popular statistical programming language r for data science. r is popular for statistical computing and statistical analysis.
R is open source language and relatively easy to learn. R is handle huge data sets with incredible data visualization and graphics capabilities. In r data analysis better and faster and allowed them to make better visualizations.
Write Blow Code in R Studio Code Editor Or R Console
install.packages("package_name")
#Sequences 1:50 # print numbers 1 to 50 to the console 50:1 # print numbers 50 to 1 in reverse order to console #Sequences continued... #seq(from = 1, to = 1, by = ((to - from)/(length.out - 1)),length.out = NULL, along.with = NULL, ...) seq(7, 49, 7) #more parameter use ?funciton_name Example: ?seq y <- c(3, 5, 8, 1, 2) #Examining Variables class(y) # Return y argument character, numeric or logical class # Factors Variables length(y) # Return y argument length levels(y) #Return the value of the levels of y argument #table function x <- c("a", "a", "b", "b", "b", "c") table(x)
# While Loop i <- 1 while (i < 6) { print(i) i = i+1 }
#if…else statement x <- -14 if(x > 0){ print("positive x") } else { print("not positive x") }
# For Loop x<-1:100 for(i in 1:100) { if(i%%2!=0) { print(x[i]) } }
y<-function(k) { s<-sum(k) MEAN<-s/length(k) print(MEAN) }
#Numeric Vectors y <- c(20, 20, 60) #Character vectors datascientist <- c("Programming language","statistics","domain knowledge") #Connecting Numeric and Character Vectors names(y) <- datascientist y #Subsetting vectors y[1:2]
#data frame df<-data.frame(x=1:2,y=3:4) is.na(df) # Return True Or Flash sum(is.na(df)) # Return Sum Of Null Values #dplyr library(dplyr) # Data Manipulation library data("ToothGrowth") #load built in data set View(ToothGrowth) #view data set ?ToothGrowth
#bar plot barplot(iris$Sepal.Length)
#Scatterplots library(ggplot2) ggplot(iris, aes(x = Sepal.Length, y = iris$Species)) + geom_point()
#Histograms hist(iris$Sepal.Length)
#Boxplots boxplot(iris$Sepal.Length, horizontal = T)
pie chart #pie chart pie(table(iris$Species))
read.csv(file="rainfall.csv", header = TRUE, sep=",")
install.packages("foreign") library(foreign) # used to load statistical software packages indata <- read.spss("election2020.sav") indataframe <- as.data.frame(indata) str(indataframe) summary(indataframe) install.packages("sas7bdat") # or install Hmisc library(sas7bdat) data(sas7bdat.sources)
install.packages("RODBC") library(RODBC) odbcDataSources() getwd()
myconn <- odbcConnectAccess("(Microsoft Access Driver(*.mdb,*.accdb)); Dbq=A.mdb") Test <- sqlFetch(myconn, "Location") Test
install.packages("RCurl") library(RCurl) data2 <- getURL("site_url")
#load css file from local system mba <- read.csv("mba.csv") #Measures of Central Tendency mean(mba$gmat) median(mba$gmat) #mode getmode <- function(x) { uniquv <- unique(x) uniquv[which.max(tabulate(match(x,uniquv)))] } getmode(mba$gmat) #Measures of Dispersion var(mba$gmat) sd(mba$gmat) range(mba$gmat) rangevalue <- function(x) { max(x)-min(x) } rangevalue(mba$gmat) #Measures of skewness install.packages("moments") library(moments) #Measures of skewness skewness(mba$gmat) #Measures of Kurtosis kurtosis(mba$gmat) #Graphical Representation #Boxplot boxplot(mba$gmat,horizontal = TRUE) #Histogram hist(mba$gmat) #Barplot barplot(mba$gmat) str(mba) mba$datasrno <- as.factor(mba$datasrno) #install.packages(psych) library(psych) describe(mba) # to calculate Z score qnorm(0.950)#90% qnorm(0.975)#95% qnorm(0.995) #to calculate t score qt(0.950,772)#0.90% #qqplot qqnorm(mba$gmat) qqline(mba$gmat)
#Simple Linear Regression Salary_hike_Model <- lm(Salary ~ YearsExperience, data = Salary_hike) summary(Salary_hike_Model) plot(Salary_hike_Model) #multiple linear regression model3 <- lm(price ~ speed + hd + ram + screen + ads + trend + premium, data = ComputerData[-c(1441, 1701),]) summary(model3) avPlots(model3)
Decision Trees Model Use C50 library. First Install Or Update C50 packages using install.packages(“C50”) code. set predict and independent variable and build model.
install.packages("C50") library(C50) company_train_model <-C5.0(company_train[,-1],company_train$company_sales) plot(company_train_model)
library(randomForest) company_train_model <- randomForest(Sales~.,data = company_train,na.action=na.roughfix,importance=TRUE) company_train_model plot(company_train_model) company_train_pred <- predict(company_train_model,company_train)
R Programming is open source Language with large community. R have 10,000+ packages. R Is Mostly Use For Visualization And Statistical Analysis. R Is Easy Learnable Better than other language.
Comments
Jenniffer Connarton,
02 Apr 2022Excellent, Stick with it!