# Assign a value to a variable
name = "Juan"
age = 25
height = 1.75

# Print the variables
print("Name:", name, "Age:", age, "Height:", height)

# Another way to print
print(f"Name: {name}, Age: {age}, Height: {height} meters")

Name: Juan Age: 25 Height: 1.75
Name: Juan, Age: 25, Height: 1.75 meters

print(name, age, height)
print(name)

Juan 25 1.75
Juan

# Mathematical Operations
a = 10
b = 3

sum = a + b
subtraction = a - b
multiplication = a * b
division = a / b

# Print the results
print(f"Addition: {sum}, Subtraction: {subtraction}, Multiplication: {multiplication}, Division: {division}")

Addition: 13, Subtraction: 7, Multiplication: 30, Division: 3.3333333333333335

print(sum, subtraction, multiplication, division)

13 7 30 3.3333333333333335

10-3

7

23*45

1035

30/3

10.0

5<=2

False

3-2

1

# This is a comment, Python ignores everything after the symbol #
print('Hello, world!')

Hello, world!

statement = "we are learning Python for Business"
print(statement)

we are learning Python for Business

# Defining a list in Python
my_list = [1, 2, 3, 4, 5]
print(my_list)

[1, 2, 3, 4, 5]

# A list can contain different types of data another
another_list = [1, "two", 3.0, True]
print(another_list)

[1, 'two', 3.0, True]

# Access list elements by index (starts at 0)
print(my_list[0]) # Prints 1
print(another_list[1]) # Prints "two"

1
two

# Modify list elements
my_list[2] = 10
print(my_list) # Prints [1, 2, 10, 4, 5]

[1, 2, 10, 4, 5]

# Add items to the list
my_list.append(6)
print(my_list) # Prints [1, 2, 10, 4, 5, 6]

[1, 2, 10, 4, 5, 6]

# Remove items from the list
my_list.remove(4)
print(my_list) # Prints [1, 2, 10, 5, 6]

[1, 2, 10, 5, 6]

# Get the length of the list
len(my_list) # Prints 5

5

numbers = [10, 20, 30, 40]
numbers.insert(1, 15) # This will insert 15 at index 1
print(numbers)

[10, 15, 20, 30, 40]

numbers = [10, 20, 30, 40]
removed = numbers.pop(2) # This will remove the element at index 2 (30)
print(numbers)
print("Element removed:", removed)

[10, 20, 40]
Element removed: 30

list = [10, 20, 30, 40, 50]
cut = list[1:4]
print(cut)

[20, 30, 40]

numbers = [10, 20, 30, 40]
numbers[1:3] = [25, 35] # Swap the elements at positions 1 and 2
print(numbers)

[10, 25, 35, 40]

list = [10, 20, 30, 40, 50]

# Slice from index 1 to 4 (not including 4)
slice = list[1:4] # Gets [20, 30, 40]
print(slice)

# Skip start: start from the beginning
slice_start = list[:3] # Gets [10, 20, 30]
print(slice_start)

# Skip stop: go to the end of the list
slice_stop = list[2:] # Gets [30, 40, 50]
print(slice_stop)

# Skip both start and stop: get the entire list
slice_full = list[:] # Gets [10, 20, 30, 40, 50]
print(slice_full)

[20, 30, 40]
[10, 20, 30]
[30, 40, 50]
[10, 20, 30, 40, 50]

list = [10, 20, 30, 40, 50]

# We use negative indices to get the last elements
last = list[-1] # Gets 50
penultimate = list[-2] # Gets 40
print(last, penultimate)

50 40

list = [10, 20, 30, 40, 50, 60, 70]

# Take all elements, skipping one
slice_step = list[::4]
print(slice_step)

[10, 50]

list = [10, 20, 30, 60, 50]

# Reverse the list
reverse_list = list[::-1]
print(reverse_list)

[50, 60, 30, 20, 10]

my_list = [3, 1, 4, 1, 5, 9, 2, 6]

# Sort the list in ascending order
my_list.sort()
print(f"List sorted ascending: {my_list}")

# Sort the list in descending order
my_list.sort(reverse=True)
print(f"List sorted descending: {my_list}")

List sorted ascending: [1, 1, 2, 3, 4, 5, 6, 9]
List sorted descending: [9, 6, 5, 4, 3, 2, 1, 1]

strings = ["apple", "banana", "kiwi", "orange", "grape"]

# Sort strings by length using the len function as the key
strings.sort(key=len)

strings

['kiwi', 'apple', 'grape', 'banana', 'orange']

# Tuple: Ordered, immutable, allows duplicate members.
my_tuple = (1, 2, 3, "four")
print("Tuple:", my_tuple)

# Set: Unordered, mutable, no duplicate members.
my_set = {1, 2, 3, 2, 4}
print("Set:", my_set)

# Dictionary: Unordered, mutable, key-value pairs, keys must be unique.
my_dict = {"name": "Alice", "age": 30, "city": "New York"}
print("Dictionary:", my_dict)

Tuple: (1, 2, 3, 'four')
Set: {1, 2, 3, 4}
Dictionary: {'name': 'Alice', 'age': 30, 'city': 'New York'}

number = 5

if number > 0:
  print("The number is positive.")
elif number < 0:
  print("The number is negative.")
else:
  print("The number is zero.")

The number is positive.

# Conditional Example
age = 18

if age >= 18:
  print("You are an adult.")
else:
  print("You are a minor.")

You are an adult.

numbers = [1, 2, 3, 4, 5]
sum = 0

for num in numbers:
  sum += num

print("The sum is:", sum)

The sum is: 15

fruits = ["apple", "banana", "cherry", "grape"]

# For loop to iterate over a list
for fruit in fruits:
  print(fruit)

apple
banana
cherry
grape

counter = 0

while counter < 5:
  print("Counter:", counter)
  counter += 1

Counter: 0
Counter: 1
Counter: 2
Counter: 3
Counter: 4

import pandas as pd

data_tips = pd.read_csv("https://raw.githubusercontent.com/mwaskom/\
seaborn-data/master/tips.csv")

data_tips.head()

data_tips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   total_bill  244 non-null    float64
 1   tip         244 non-null    float64
 2   sex         244 non-null    object 
 3   smoker      244 non-null    object 
 4   day         244 non-null    object 
 5   time        244 non-null    object 
 6   size        244 non-null    int64  
dtypes: float64(2), int64(1), object(4)
memory usage: 13.5+ KB

data_tips.describe()

data_tips.to_csv('table_tips.csv', index=False)
# index=False to avoid saving the index
# Annotations that begin with the 'hashtag' symbol are not executed

from google.colab import files
files.download('table_tips.csv')

data_tips.to_excel('table_tips.xlsx', index=False)

files.download('table_tips.xlsx')

uploaded = files.upload()

Saving table_tips.xlsx to table_tips (1).xlsx

tips = pd.read_excel("table_tips.xlsx")
tips.head()

uploaded = files.upload()

Saving table_tips.csv to table_tips (2).csv

propinas = pd.read_csv("table_tips.csv")
propinas.head()

tips = pd.read_table("table_tips.csv", sep=',')
tips.head()

tips = pd.read_csv("table_tips.csv", header=None)
tips.head()

import numpy as np

columns_names = ['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']

data_url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/\
tips.csv"

data_tips = np.genfromtxt(data_url, delimiter=',', skip_header=1, dtype=None,\
names=columns_names, encoding='utf-8')

print(data_tips[:5])

[(16.99, 1.01, '"Female"', '"No"', '"Sun"', '"Dinner"', 2)
 (10.34, 1.66, '"Male"', '"No"', '"Sun"', '"Dinner"', 3)
 (21.01, 3.5 , '"Male"', '"No"', '"Sun"', '"Dinner"', 3)
 (23.68, 3.31, '"Male"', '"No"', '"Sun"', '"Dinner"', 2)
 (24.59, 3.61, '"Female"', '"No"', '"Sun"', '"Dinner"', 4)]

for column in data_tips.dtype.names:
  print(f"Column: {column}, Type: {data_tips.dtype[column]}")

Column: total_bill, Type: float64
Column: tip, Type: float64
Column: sex, Type: <U8
Column: smoker, Type: <U5
Column: day, Type: <U6
Column: time, Type: <U8
Column: size, Type: int64

data_tips_numericos = np.array([data_tips['total_bill'], data_tips['tip'],\
                                data_tips['size']]).T

print(data_tips_numericos[:10,:])

[[16.99  1.01  2.  ]
 [10.34  1.66  3.  ]
 [21.01  3.5   3.  ]
 [23.68  3.31  2.  ]
 [24.59  3.61  4.  ]
 [25.29  4.71  4.  ]
 [ 8.77  2.    2.  ]
 [26.88  3.12  4.  ]
 [15.04  1.96  2.  ]
 [14.78  3.23  2.  ]]

np.count_nonzero(data_tips_numericos, axis=0)
# axis=0 specifies that we count across all rows
# use axis=1 if you want to count across all columns

array([244, 244, 244])

np.mean(data_tips_numericos, axis=0)

array([19.78594262,  2.99827869,  2.56967213])

np.std(data_tips_numericos, axis=0)

array([8.88415058, 1.38079995, 0.94914883])

np.min(data_tips_numericos, axis=0)

array([3.07, 1.  , 1.  ])

np.max(data_tips_numericos, axis=0)

array([50.81, 10.  ,  6.  ])

np.median(data_tips_numericos, axis=0),

(array([17.795,  2.9  ,  2.   ]),)

np.percentile(data_tips_numericos, 25, axis=0),

(array([13.3475,  2.    ,  2.    ]),)

np.percentile(data_tips_numericos, 75, axis=0),

(array([24.1275,  3.5625,  3.    ]),)

averages = np.mean(data_tips_numericos, axis=0)
print(averages)

[19.78594262  2.99827869  2.56967213]

np.savetxt('average_results.csv', averages, delimiter=',', fmt='%s')

files.download('average_results.csv')

uploaded = files.upload()

Saving average_results.csv to average_results (1).csv

data = np.genfromtxt('average_results.csv', delimiter=',')
print(data)

[19.78594262  2.99827869  2.56967213]

import matplotlib.pyplot as plt
import seaborn as sns

# Histogram of check totals
plt.figure(figsize=(8, 5)) #inches
sns.histplot(data_tips['total_bill'], bins=20, kde=True)
plt.title("Check Total Distribution")
plt.xlabel("Total Bill")
plt.ylabel("Frequency")
plt.show()

plt.figure(figsize=(8, 5))
sns.scatterplot(x=data_tips['total_bill'], y=data_tips['tip'],\
                hue=data_tips['sex'])
plt.title("Account Total vs Tip")
plt.xlabel("Total Bill")
plt.ylabel("Tip")
plt.show()

plt.figure(figsize=(8, 5))
sns.scatterplot(x=data_tips['total_bill'], y=data_tips['tip'],\
                hue=data_tips['sex'])
plt.title("Account Total vs Tip")
plt.xlabel("Total Bill")
plt.ylabel("Tip")
plt.savefig("tip_graph.png")
plt.show()

files.download("tip_graph.png")

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

personal_folder='/content/drive/MyDrive/Business_Statistics_2025Q3/Workshop/Notebooks/'

import os
os.chdir(personal_folder)

os.getcwd()

'/content/drive/MyDrive/Business_Statistics_2025Q3/Workshop/Notebooks'

import pandas as pd
datos_gasolina = pd.read_csv('GASREGW.csv')
print(datos_gasolina)

     observation_date  GASREGW
0          1990-08-20    1.191
1          1990-08-27    1.245
2          1990-09-03    1.242
3          1990-09-10    1.252
4          1990-09-17    1.266
...               ...      ...
1816       2025-06-09    3.108
1817       2025-06-16    3.139
1818       2025-06-23    3.213
1819       2025-06-30    3.164
1820       2025-07-07    3.125

[1821 rows x 2 columns]

import matplotlib.pyplot as plt

# Plot
plt.figure(figsize=(8, 4))
plt.plot(datos_gasolina["observation_date"][::50], \
         datos_gasolina["GASREGW"][::50], marker='o',\
         linestyle='-', color='b', label="Gas Price")

# Formatting
plt.xlabel("Date")
plt.ylabel("Gas Price ($)")
plt.title("Gas Prices Over Time")
plt.legend()
plt.grid(True)

# Rotate x-axis labels for readability
plt.xticks(rotation=45)

# Show plot
plt.savefig('gasoline.png', dpi=300)
plt.show()

ls

'Análisis Descriptivo y Visualización de Datos.ipynb'
 gasoline.png
 GASREGW.csv
 Introducción_al_Análisis_Predictivo.ipynb
'Introduction to Python and Data Analytics Tools.ipynb'
'Manipulación de Datos con Pandas.ipynb'

Column	Description
`total_bill`	Total bill in dollars.
`tip`	Tip in dollars.
`sex`	Sex of the bill holder (`Male` or `Female`).
`smoker`	Indicates whether there are smokers in the group (`Yes` or `No`).
`day`	Day of the week (`Thur`, `Fri`, `Sat`, `Sun`).
`time`	Time of day (`Lunch` or `Dinner`).
`size`	Size of the group of diners.

	total_bill	tip	size
count	244.000000	244.000000	244.000000
mean	19.785943	2.998279	2.569672
std	8.902412	1.383638	0.951100
min	3.070000	1.000000	1.000000
25%	13.347500	2.000000	2.000000
50%	17.795000	2.900000	2.000000
75%	24.127500	3.562500	3.000000
max	50.810000	10.000000	6.000000

Course: Introduction to Data Analytics with Python.¶

Instructor: Dr. Andrés García Medina¶

Email: andgarm.n@gmail.com¶

Site: https://sites.google.com/view/andresgmen/home ¶

Module 1: Introduction to Python and Tools for Data Analytics¶

Content¶

Software¶

1: Basic Python Syntax¶

Conditionals and Loops in Python (Advanced)¶

Recap of section 1¶

2: Guide Example: Restaurant Tip Data¶

2.1. Explore Data with $\mathtt{Pandas}$¶

2.2. Explore data with $\mathtt{NumPy}$ (advanced)¶

2.3. Explore data with $\mathtt{matplotlib}$¶

Recap of section 2¶

3: Upload and open a file from Google Drive¶

Example¶

Recap of section 3¶

General Summary¶

Referencias¶

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

	0	1	2	3	4	5	6
0	total_bill	tip	sex	smoker	day	time	size
1	16.99	1.01	Female	No	Sun	Dinner	2
2	10.34	1.66	Male	No	Sun	Dinner	3
3	21.01	3.5	Male	No	Sun	Dinner	3
4	23.68	3.31	Male	No	Sun	Dinner	2

Course: Introduction to Data Analytics with Python.¶

Instructor: Dr. Andrés García Medina¶

Email: andgarm.n@gmail.com¶

Site: https://sites.google.com/view/andresgmen/home¶

Module 1: Introduction to Python and Tools for Data Analytics¶

Content¶

Software¶

1: Basic Python Syntax¶

Conditionals and Loops in Python (Advanced)¶

Recap of section 1¶

2: Guide Example: Restaurant Tip Data¶

2.1. Explore Data with $\mathtt{Pandas}$¶

2.2. Explore data with $\mathtt{NumPy}$ (advanced)¶

2.3. Explore data with $\mathtt{matplotlib}$¶

Recap of section 2¶

3: Upload and open a file from Google Drive¶

Example¶

Recap of section 3¶

General Summary¶

Referencias¶

Site: https://sites.google.com/view/andresgmen/home ¶