# ============================================================================
# Exercise 2: Inspecting Data - SOLUTION
# Module: Data Importing, Tidying and Writing
# ============================================================================

# INSTRUCTIONS:
# Complete the code below by filling in the blanks (___) and following
# the step-by-step guidance. Run each section to check your work!

# NOTE: This exercise assumes you have completed Exercise 1 and have the
# following datasets loaded: dt_firms, panel_vat, and panel_cit
# If you don't have them loaded, re-run Exercise 1 first!

# ============================================================================
# PART 1: Load Required Packages
# ============================================================================

# Step 1: Load the dplyr and data.table packages
# TODO: Load both packages
library(dplyr)
library(data.table)


# ============================================================================
# PART 2: Inspect dt_firms
# ============================================================================

# Step 2: Check the dimensions (number of rows and columns) of dt_firms
# TODO: Use dim() to see dimensions
dim(dt_firms)

# Step 3: Use glimpse() to examine the structure of dt_firms
# HINT: glimpse() provides a better view than str()
# TODO: Use glimpse() from dplyr
glimpse(dt_firms)

# Step 4: Generate summary statistics for dt_firms
# TODO: Use summary() to get statistics
summary(dt_firms)


# ============================================================================
# PART 3: Inspect panel_vat
# ============================================================================

# Step 5: Display the first 10 rows of panel_vat
# TODO: Use head() with n = 10
head(panel_vat, 10)

# Step 6: Check the number of unique firms in panel_vat
# HINT: Use length() and unique() together on the firm_id column
# TODO: Count unique values in firm_id
length(unique(panel_vat$firm_id))

# Step 7: Find the column names of panel_vat
# TODO: Use names() or colnames()
names(panel_vat)


# ============================================================================
# PART 4: Inspect panel_cit
# ============================================================================

# Step 8: Display the last 5 rows of panel_cit
# TODO: Use tail() with n = 5
tail(panel_cit, 5)

# Step 9: Check if there are any missing values in panel_cit using summary()
# HINT: summary() will show NA's count for each variable if there are missing values
# TODO: Use summary()
summary(panel_cit)


# ============================================================================
# REFLECTION QUESTIONS (no code needed, just think about these)
# ============================================================================

# Question 1: Which dataset has the most rows?
# Question 2: Which dataset has the most columns?
# Question 3: Did you find any missing values? In which dataset and variables?


# ============================================================================
# CONGRATULATIONS!
# You've completed Exercise 2. Make sure all your code runs without errors.
# Compare your results with the solution script when you're done.
# ============================================================================
