# Lo siguiente hace que no se vean las advertencias al ejecutar
# ciertos codigos
import warnings
warnings.filterwarnings('ignore')


# Realizamos la instalacion de las siguientes librerias
!pip install faker
!pip install Dumper

Collecting faker
  Downloading Faker-15.3.3-py3-none-any.whl (1.6 MB)
Requirement already satisfied: python-dateutil>=2.4 in c:\programdata\miniconda3\lib\site-packages (from faker) (2.8.2)
Requirement already satisfied: six>=1.5 in c:\programdata\miniconda3\lib\site-packages (from python-dateutil>=2.4->faker) (1.16.0)
Installing collected packages: faker
Successfully installed faker-15.3.3
Collecting Dumper
  Downloading Dumper-1.2.0-py2.py3-none-any.whl (13 kB)
Installing collected packages: Dumper
Successfully installed Dumper-1.2.0


# importacion necesaria
from faker import Faker
# Instanciamos
fake = Faker()

# con lo siguiente obtenemos informacion
# de manera aleatoria referente a los campos:
# nombre, direccion, email, breve descripcion de la persona
# y pais
print(fake.name())
print(fake.address())
print(fake.email())
print(fake.text())
print(fake.country())

Jessica Brown
1743 Joseph Lodge
Lake Johnstad, IL 69208
barkersarah@example.com
This a foreign speak heavy.
Statement focus together fill store ago media. Woman pretty table sister radio care choose.
Vanuatu


print(fake.name())
print(fake.address())
print(fake.email())
print(fake.text())
print(fake.country())

Gary Turner
9460 Wilson Vista Suite 386
East Sergio, MP 58762
dallen@example.net
Agree development edge same whose. Physical kind eye event. Lead leg wait arrive who left this thing.
Gambia


# numero aleatorio entero entre el 0 y el 99
fake.random_int(0, 100)

66


# Podemos obtener numeros decimales utilizando numpy
import numpy as np

print(np.random.rand())
print(np.random.rand())

0.504182924195679
0.27661491275548944


import pandas as pd

# creamos 10000 nombres 
nombres = [fake.name() + fake.last_name() for i in range(10000)]
# 10000 correos electronicos
correos = [fake.email() for i in range(10000)]
# 10000 edades
edades = [fake.random_int(18, 80) for i in range(10000)]
# 10000 salarios
salarios = [np.random.rand() * 10000 for i in range(10000)]

# Creamos un diccionario con la info anterior
dict_info = {'Nombre': nombres, 'Edad': edades, 'Correo': correos, 
             'Salario': salarios}
# Creamos el dataframe
df = pd.DataFrame(dict_info)
df.head()


df.tail()


# tenemos 10000 registros y 4 columnas
df.shape

(10000, 4)


# accedemos al atributo de indice
df.index

RangeIndex(start=0, stop=10000, step=1)


# Al definir un set garantizaremos elementos unicos
ID = set(np.random.randint(10000,100000) for i in range(11000))
len(ID)

10342


ID = list(ID)
ID = ID[0:10000]
print(len(ID))
print(ID[:20])

10000
[65538, 98311, 32783, 32785, 98327, 32796, 65566, 65571, 98342, 65575, 32808, 65578, 98346, 65581, 65583, 65589, 65590, 32823, 98363, 65596]


df['ID'] = ID
df.head()


df.ID.unique().shape

(10000,)


# configuramos drop=True para que la columna ID sea eliminada
# pero que esta sea considerada como el nuevo indice del
# dataframe
df.set_index('ID', drop=True)


df.head()


df = df.set_index('ID', drop=True)
df.head()


df = df.reset_index()
df.head()


df.set_index(['ID', 'Nombre'], drop=True)


df = df.set_index('ID', drop=True)
df.head()


# Buscamos la informacion de la persona referente al ID = 98327
df.loc[98327]

Nombre           Regina RiceCruz
Edad                          24
Correo     ccaldwell@example.net
Salario              8305.623411
Name: 98327, dtype: object


# Lo siguiente nos marcara error debido a que no existe ese indice
# dentro del dataframe:
# df.loc[100000]


# Buscamos la informacion de la persona referente al ID = 32785
df.loc[32785]

Nombre       Heather SmithRoss
Edad                        59
Correo     karen37@example.com
Salario            3811.456078
Name: 32785, dtype: object


type(df.loc[32785])

pandas.core.series.Series


# Seleecionamos todos los valores de la columna Nombre
df.loc[:, 'Nombre']

ID
65538       Lisa BradleyTodd
98311      Robert KirbyLewis
32783     Linda HuynhCabrera
32785      Heather SmithRoss
98327        Regina RiceCruz
                ...         
97176     James BennettLyons
31644      Julie WalshMoreno
97182    Zachary ReyesMartin
97185    Kelly KennedyPalmer
31651    Kathryn CollierCook
Name: Nombre, Length: 10000, dtype: object


# Seleecionamos todos los valores de la columna Nombre y Edad
df.loc[:, ['Nombre', 'Edad'] ]


df.loc[:, 'Edad']

ID
65538    68
98311    55
32783    41
32785    59
98327    24
         ..
97176    80
31644    64
97182    54
97185    20
31651    40
Name: Edad, Length: 10000, dtype: int64


df[df.Edad >= 50]


df.loc[:, 'Edad'] >= 50

ID
65538     True
98311     True
32783    False
32785     True
98327    False
         ...  
97176     True
31644     True
97182     True
97185    False
31651    False
Name: Edad, Length: 10000, dtype: bool


df[df.loc[:, 'Edad'] >= 50]


df = df.reset_index()
df.head()


df.loc[4]

ID                         98327
Nombre           Regina RiceCruz
Edad                          24
Correo     ccaldwell@example.net
Salario              8305.623411
Name: 4, dtype: object


df.loc[7000]

ID                       23083
Nombre         Tanya MannKnapp
Edad                        59
Correo     ularson@example.com
Salario            7462.090363
Name: 7000, dtype: object


# Accedemos a la informacion de las personas que se encuentran
# entre el indice 5 y 10
df.loc[5:10]


# Accedemos a la informacion de las personas que se encuentran
# entre el indice 5 y 10, pero unicamente nos interesara
# la informacion de la columna Correo
df.loc[5:10, 'Correo']

5         hhebert@example.org
6     aliciajones@example.net
7       kathryn72@example.org
8         ugarcia@example.com
9      danielcole@example.org
10      katelyn42@example.com
Name: Correo, dtype: object


# Accedemos a la informacion de las personas que se encuentran
# entre el indice 5 y 10, pero unicamente nos interesara
# la informacion de la columna Correo y Salario
df.loc[5:10, ['Correo', 'Salario']]


df = df.set_index('ID', drop=True)
df.head()


# info del primer elemento
df.iloc[0]

Nombre              Lisa BradleyTodd
Edad                              68
Correo     williamsjames@example.org
Salario                  7606.614336
Name: 65538, dtype: object


# info del decimo elemento
df.iloc[10]

Nombre      Robert JacobsonOrtiz
Edad                          64
Correo     katelyn42@example.com
Salario              4723.157451
Name: 32808, dtype: object


# elementos entre la posicion 5 y 10
df.iloc[5:10]


df = df.reset_index()
df.head()


# elementos entre la posicion 5 y 10
df.iloc[5:10]


# 5 filas, consideramos la primer columna (ID)
df.iloc[5:10, 0]

5    32796
6    65566
7    65571
8    98342
9    65575
Name: ID, dtype: int64


# 5 filas, consideramos la primer columna (ID) y la segunda columna (nombre)
df.iloc[5:10, 0:2]


# alternativamente
# 5 filas, consideramos la primer columna (ID) y la segunda columna (nombre)
df.iloc[5:10, [0, 1]]


# 5 filas, consideramos la primer columna (ID) y la tercer columna (Edad)
df.iloc[5:10, 0:3:2]


# alternativamente
# 5 filas, consideramos la primer columna (ID) y la tercer columna (Edad)
df.iloc[5:10, [0, 2]]

	Nombre	Edad	Correo	Salario
9995	Jennifer CoxNelson	50	ogreene@example.net	9297.110391
9996	Travis PottsRogers	47	danielschristopher@example.com	1352.001969
9997	Jonathan Simmons Jr.Davis	33	williewilliams@example.org	9015.793643
9998	Kathryn RosalesRussell	61	james23@example.net	7281.333227
9999	Daniel KramerSmith	64	gordonchristopher@example.com	222.855406

Curso de introducción a la programación con Python¶

Contenido¶

Índice en un dataframe ¶

Librería faker¶

loc e iloc ¶

	Nombre	Edad	Correo	Salario
0	Lisa BradleyTodd	68	williamsjames@example.org	7606.614336
1	Robert KirbyLewis	55	rebeccarhodes@example.com	4210.782545
2	Linda HuynhCabrera	41	nathanshaffer@example.org	9609.605572
3	Heather SmithRoss	59	karen37@example.com	3811.456078
4	Regina RiceCruz	24	ccaldwell@example.net	8305.623411

	Nombre	Edad	Correo	Salario
ID
65538	Lisa BradleyTodd	68	williamsjames@example.org	7606.614336
98311	Robert KirbyLewis	55	rebeccarhodes@example.com	4210.782545
32783	Linda HuynhCabrera	41	nathanshaffer@example.org	9609.605572
32785	Heather SmithRoss	59	karen37@example.com	3811.456078
98327	Regina RiceCruz	24	ccaldwell@example.net	8305.623411
...	...	...	...	...
97176	James BennettLyons	80	vargasstacey@example.org	6703.123603
31644	Julie WalshMoreno	64	cynthiavargas@example.net	9039.089244
97182	Zachary ReyesMartin	54	fwashington@example.net	4490.081988
97185	Kelly KennedyPalmer	20	kshaw@example.com	1548.094240
31651	Kathryn CollierCook	40	dianafields@example.org	3810.610008

	ID	Nombre	Edad	Correo	Salario
5	32796	Deanna WestCraig	48	hhebert@example.org	1268.823293
6	65566	Catherine MortonSchroeder	74	aliciajones@example.net	5065.745673
7	65571	Daniel SolisSmith	72	kathryn72@example.org	852.137703
8	98342	Marisa BishopFerguson	54	ugarcia@example.com	4800.799213
9	65575	David CarpenterWilson	53	danielcole@example.org	6036.172515
10	32808	Robert JacobsonOrtiz	64	katelyn42@example.com	4723.157451