# Cargamos el conjunto de datos
# En Python:
# import seaborn as sns
# df1 = sns.load_dataset('iris')
df1 <- iris
df1
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <fct> |
5.1 | 3.5 | 1.4 | 0.2 | setosa |
4.9 | 3.0 | 1.4 | 0.2 | setosa |
4.7 | 3.2 | 1.3 | 0.2 | setosa |
4.6 | 3.1 | 1.5 | 0.2 | setosa |
5.0 | 3.6 | 1.4 | 0.2 | setosa |
5.4 | 3.9 | 1.7 | 0.4 | setosa |
4.6 | 3.4 | 1.4 | 0.3 | setosa |
5.0 | 3.4 | 1.5 | 0.2 | setosa |
4.4 | 2.9 | 1.4 | 0.2 | setosa |
4.9 | 3.1 | 1.5 | 0.1 | setosa |
5.4 | 3.7 | 1.5 | 0.2 | setosa |
4.8 | 3.4 | 1.6 | 0.2 | setosa |
4.8 | 3.0 | 1.4 | 0.1 | setosa |
4.3 | 3.0 | 1.1 | 0.1 | setosa |
5.8 | 4.0 | 1.2 | 0.2 | setosa |
5.7 | 4.4 | 1.5 | 0.4 | setosa |
5.4 | 3.9 | 1.3 | 0.4 | setosa |
5.1 | 3.5 | 1.4 | 0.3 | setosa |
5.7 | 3.8 | 1.7 | 0.3 | setosa |
5.1 | 3.8 | 1.5 | 0.3 | setosa |
5.4 | 3.4 | 1.7 | 0.2 | setosa |
5.1 | 3.7 | 1.5 | 0.4 | setosa |
4.6 | 3.6 | 1.0 | 0.2 | setosa |
5.1 | 3.3 | 1.7 | 0.5 | setosa |
4.8 | 3.4 | 1.9 | 0.2 | setosa |
5.0 | 3.0 | 1.6 | 0.2 | setosa |
5.0 | 3.4 | 1.6 | 0.4 | setosa |
5.2 | 3.5 | 1.5 | 0.2 | setosa |
5.2 | 3.4 | 1.4 | 0.2 | setosa |
4.7 | 3.2 | 1.6 | 0.2 | setosa |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
6.9 | 3.2 | 5.7 | 2.3 | virginica |
5.6 | 2.8 | 4.9 | 2.0 | virginica |
7.7 | 2.8 | 6.7 | 2.0 | virginica |
6.3 | 2.7 | 4.9 | 1.8 | virginica |
6.7 | 3.3 | 5.7 | 2.1 | virginica |
7.2 | 3.2 | 6.0 | 1.8 | virginica |
6.2 | 2.8 | 4.8 | 1.8 | virginica |
6.1 | 3.0 | 4.9 | 1.8 | virginica |
6.4 | 2.8 | 5.6 | 2.1 | virginica |
7.2 | 3.0 | 5.8 | 1.6 | virginica |
7.4 | 2.8 | 6.1 | 1.9 | virginica |
7.9 | 3.8 | 6.4 | 2.0 | virginica |
6.4 | 2.8 | 5.6 | 2.2 | virginica |
6.3 | 2.8 | 5.1 | 1.5 | virginica |
6.1 | 2.6 | 5.6 | 1.4 | virginica |
7.7 | 3.0 | 6.1 | 2.3 | virginica |
6.3 | 3.4 | 5.6 | 2.4 | virginica |
6.4 | 3.1 | 5.5 | 1.8 | virginica |
6.0 | 3.0 | 4.8 | 1.8 | virginica |
6.9 | 3.1 | 5.4 | 2.1 | virginica |
6.7 | 3.1 | 5.6 | 2.4 | virginica |
6.9 | 3.1 | 5.1 | 2.3 | virginica |
5.8 | 2.7 | 5.1 | 1.9 | virginica |
6.8 | 3.2 | 5.9 | 2.3 | virginica |
6.7 | 3.3 | 5.7 | 2.5 | virginica |
6.7 | 3.0 | 5.2 | 2.3 | virginica |
6.3 | 2.5 | 5.0 | 1.9 | virginica |
6.5 | 3.0 | 5.2 | 2.0 | virginica |
6.2 | 3.4 | 5.4 | 2.3 | virginica |
5.9 | 3.0 | 5.1 | 1.8 | virginica |
# En Python: df1.head(10)
# En R:
head(df1, 10)
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <fct> | |
1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
7 | 4.6 | 3.4 | 1.4 | 0.3 | setosa |
8 | 5.0 | 3.4 | 1.5 | 0.2 | setosa |
9 | 4.4 | 2.9 | 1.4 | 0.2 | setosa |
10 | 4.9 | 3.1 | 1.5 | 0.1 | setosa |
tail(df1, 5)
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <fct> | |
146 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
147 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
148 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
149 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
150 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
# En Python: df1.info()
# Factor--> categorica (variable que tiene categorias)
str(df1)
'data.frame': 150 obs. of 5 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# En Python: df1.describe()
summary(df1)
Sepal.Length Sepal.Width Petal.Length Petal.Width Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300 Median :5.800 Median :3.000 Median :4.350 Median :1.300 Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800 Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500 Species setosa :50 versicolor:50 virginica :50
# En Python: df1.columns
names(df1)
# En Python df1['nombre_columna']
# En Python df1.nombre_columna
df1$Petal.Width
# En Python df1[ ['nombre_columna1', 'nombre_columna2'] ]
# En Python df1[ ['Sepal.Length', 'Sepal.Width'] ]
# En R: df1[filas, columnas]
# quiero todas las filas
head(df1[, c('Sepal.Length', 'Sepal.Width')])
Sepal.Length | Sepal.Width | |
---|---|---|
<dbl> | <dbl> | |
1 | 5.1 | 3.5 |
2 | 4.9 | 3.0 |
3 | 4.7 | 3.2 |
4 | 4.6 | 3.1 |
5 | 5.0 | 3.6 |
6 | 5.4 | 3.9 |
1:10
c(1:10)
seq(1,10)
df1[1:6, c('Sepal.Length', 'Sepal.Width')]
Sepal.Length | Sepal.Width | |
---|---|---|
<dbl> | <dbl> | |
1 | 5.1 | 3.5 |
2 | 4.9 | 3.0 |
3 | 4.7 | 3.2 |
4 | 4.6 | 3.1 |
5 | 5.0 | 3.6 |
6 | 5.4 | 3.9 |
0 no es natural
Python: 0 es natural, las numeraciones comienzan con el 0
# En R: las numeraciones comienzan en 1
df1[c(1,90, 149), c('Sepal.Length', 'Sepal.Width')]
Sepal.Length | Sepal.Width | |
---|---|---|
<dbl> | <dbl> | |
1 | 5.1 | 3.5 |
90 | 5.5 | 2.5 |
149 | 6.2 | 3.4 |
# df1[filas, columnas]
# df1[mascara booleana, ]
df1[df1$Petal.Width > 2, ]
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <fct> | |
101 | 6.3 | 3.3 | 6.0 | 2.5 | virginica |
103 | 7.1 | 3.0 | 5.9 | 2.1 | virginica |
105 | 6.5 | 3.0 | 5.8 | 2.2 | virginica |
106 | 7.6 | 3.0 | 6.6 | 2.1 | virginica |
110 | 7.2 | 3.6 | 6.1 | 2.5 | virginica |
113 | 6.8 | 3.0 | 5.5 | 2.1 | virginica |
115 | 5.8 | 2.8 | 5.1 | 2.4 | virginica |
116 | 6.4 | 3.2 | 5.3 | 2.3 | virginica |
118 | 7.7 | 3.8 | 6.7 | 2.2 | virginica |
119 | 7.7 | 2.6 | 6.9 | 2.3 | virginica |
121 | 6.9 | 3.2 | 5.7 | 2.3 | virginica |
125 | 6.7 | 3.3 | 5.7 | 2.1 | virginica |
129 | 6.4 | 2.8 | 5.6 | 2.1 | virginica |
133 | 6.4 | 2.8 | 5.6 | 2.2 | virginica |
136 | 7.7 | 3.0 | 6.1 | 2.3 | virginica |
137 | 6.3 | 3.4 | 5.6 | 2.4 | virginica |
140 | 6.9 | 3.1 | 5.4 | 2.1 | virginica |
141 | 6.7 | 3.1 | 5.6 | 2.4 | virginica |
142 | 6.9 | 3.1 | 5.1 | 2.3 | virginica |
144 | 6.8 | 3.2 | 5.9 | 2.3 | virginica |
145 | 6.7 | 3.3 | 5.7 | 2.5 | virginica |
146 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
149 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
mascara <- df1$Petal.Width > 2 & df1$Species == 'virginica'
df1[mascara, ]
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <fct> | |
101 | 6.3 | 3.3 | 6.0 | 2.5 | virginica |
103 | 7.1 | 3.0 | 5.9 | 2.1 | virginica |
105 | 6.5 | 3.0 | 5.8 | 2.2 | virginica |
106 | 7.6 | 3.0 | 6.6 | 2.1 | virginica |
110 | 7.2 | 3.6 | 6.1 | 2.5 | virginica |
113 | 6.8 | 3.0 | 5.5 | 2.1 | virginica |
115 | 5.8 | 2.8 | 5.1 | 2.4 | virginica |
116 | 6.4 | 3.2 | 5.3 | 2.3 | virginica |
118 | 7.7 | 3.8 | 6.7 | 2.2 | virginica |
119 | 7.7 | 2.6 | 6.9 | 2.3 | virginica |
121 | 6.9 | 3.2 | 5.7 | 2.3 | virginica |
125 | 6.7 | 3.3 | 5.7 | 2.1 | virginica |
129 | 6.4 | 2.8 | 5.6 | 2.1 | virginica |
133 | 6.4 | 2.8 | 5.6 | 2.2 | virginica |
136 | 7.7 | 3.0 | 6.1 | 2.3 | virginica |
137 | 6.3 | 3.4 | 5.6 | 2.4 | virginica |
140 | 6.9 | 3.1 | 5.4 | 2.1 | virginica |
141 | 6.7 | 3.1 | 5.6 | 2.4 | virginica |
142 | 6.9 | 3.1 | 5.1 | 2.3 | virginica |
144 | 6.8 | 3.2 | 5.9 | 2.3 | virginica |
145 | 6.7 | 3.3 | 5.7 | 2.5 | virginica |
146 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
149 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
head(df1)
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <fct> | |
1 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
# En Python: df1.iloc[0,0]
# df1[fila, columna]
# acceder a un solo elemento
# fila 1, columna 1
df1[1,1]
# En Python: df1.iloc[0,0] = 3.0
# igualito a iloc
df1[1,1] <- 3.0
df1[1,1]
head(df1)
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <fct> | |
1 | 3.0 | 3.5 | 1.4 | 0.2 | setosa |
2 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
3 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
4 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
5 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
6 | 5.4 | 3.9 | 1.7 | 0.4 | setosa |
# Creamos dos vectores:
# Un vector de nombres
nombre <- c("Ana", "Luna", "Fernando")
# Un vector de edades
edad <- c(32, 17, 40)
# Procederemos a crear el dataframe con la funcion data.frame
df_new <- data.frame(
"Nombres" = nombre,
"Edades" = edad
)
df_new
Nombres | Edades |
---|---|
<chr> | <dbl> |
Ana | 32 |
Luna | 17 |
Fernando | 40 |
# agregamos una columna nueva
df_new$Col.Nueva <- c(1,2,3)
df_new
Nombres | Edades | Col.Nueva |
---|---|---|
<chr> | <dbl> | <dbl> |
Ana | 32 | 1 |
Luna | 17 | 2 |
Fernando | 40 | 3 |
nombre <- c("Ana2")
edad <- c(32)
colnueva <- c(24)
# Procederemos a crear el dataframe con la funcion data.frame
df_new2 <- data.frame(
"Nombres" = nombre,
"Edades" = edad,
"Col.Nueva" = colnueva
)
df_New <- rbind(df_new, df_new2)
df_New
Nombres | Edades | Col.Nueva |
---|---|---|
<chr> | <dbl> | <dbl> |
Ana | 32 | 1 |
Luna | 17 | 2 |
Fernando | 40 | 3 |
Ana2 | 32 | 24 |
Recordemos en Python
def <nombre funcion>(<nombre parametros>):
# instrucciones
return <lo que regresa la funcion>
En R
<nombre funcion> <- function(<nombres parametros>){
# instrucciones
return(<lo que regresa la funcion>)
}
f <- function(x){
return (x^2)
}
f(4)
# plot()
plot(x=df1$Petal.Length, y=df1$Petal.Width)
plot(x=df1$Petal.Length, y=df1$Petal.Width, col='red', pch = 19,
cex=1.5)
plot(x=df1$Petal.Length, y=df1$Petal.Width, col=df1$Species,
# puntos gruesos
pch = 19,
# titulo
main = "Diagrama de dispersión")
plot(x=df1$Petal.Length, y=df1$Petal.Width, col=df1$Species,
# puntos gruesos
pch = 19,
# titulo
main = "Diagrama de dispersión")
# x: posicion
legend(x="topleft",
# Texto que queremos que se vea
legend = c("Set", "Versi", "Virgi"),
fill=c("black", "red", "green"))
# Podemos agregar una cuadricula al grafico
grid()
rango <- seq(-5,5,0.002)
plot(x=rango, y=f(rango),
main = "Gráfico de una función", pch=19)
# Podemos agregar una cuadricula al grafico
grid()
f3 <- function(x){
return(x^3)
}
plot(x=rango, y=f3(rango),
main = "función cúbica", pch=19, col='red')
# Podemos agregar una cuadricula al grafico
grid()