# Exercise 1: Core dplyr Operations
# Module 3: Data Wrangling with dplyr
# ============================================================================

# Load required packages
library(dplyr)
library(data.table)
library(here)
library(lubridate)

# Load data from Intermediate folder
panel_vat <- fread(here("Data", "Intermediate", "panel_vat.csv"), cmd = FALSE)
panel_cit <- fread(here("Data", "Intermediate", "panel_cit.csv"), cmd = FALSE)

# Convert dates
panel_vat$declaration_date <- as.Date(panel_vat$declaration_date)
panel_cit$declaration_date <- as.Date(panel_cit$declaration_date)

# ============================================================================
# TASK 1: FILTERING
# ============================================================================

# Filter panel_vat to declarations with vat_outputs > 30000
# Save as 'high_vat'
# TODO: Create high_vat


# Check how many rows using nrow()
# TODO: Display number of rows


# ============================================================================
# TASK 2: SELECTING
# ============================================================================

# From panel_vat, select only firm_id, declaration_date, and vat_outputs
# Save as 'vat_selected'
# TODO: Select columns


# Display the first few rows using head()
# TODO: Display first rows


# ============================================================================
# TASK 3: MUTATING
# ============================================================================

# Create two new columns:
# - net_vat (vat_outputs - vat_inputs)
# - is_refund (TRUE if net_vat < 0, FALSE otherwise)
# Save as 'vat_with_flags'
# TODO: Create new columns


# Display the first few rows
# TODO: Display first rows


# ============================================================================
# TASK 4: ARRANGING
# ============================================================================

# Sort panel_cit by tax_paid (highest first)
# Show only the top 10 taxpayers
# Save as 'top_taxpayers'
# TODO: Sort and select top 10


# Display the result
# TODO: Display top taxpayers


# ============================================================================
# TASK 5: SUMMARIZING
# ============================================================================

# Calculate summary statistics for taxable_income from panel_cit:
# - total_income (sum)
# - avg_income (mean)
# - median_income (median)
# Save as 'cit_summary'
# TODO: Calculate summary statistics


# Display the result
# TODO: Display summary


# ============================================================================
# TASK 6: GROUPING
# ============================================================================

# Calculate average vat_outputs by filing_year
# Also count declarations per year
# Steps:
# 1. Create filing_year column using year(declaration_date)
# 2. Group by filing_year
# 3. Calculate avg_vat and num_declarations
# Save as 'vat_by_year'
# TODO: Group and summarize


# Display the result
# TODO: Display grouped summary


# ============================================================================
# TASK 7: CHALLENGE
# ============================================================================

# Find the top 3 firms by total net VAT paid in 2022
# Show firm_id and total amount
# Steps:
# 1. Create filing_year column
# 2. Filter to 2022
# 3. Create net_vat column
# 4. Group by firm_id and sum net_vat
# 5. Sort and take top 3
# Save as 'top_firms_2022'
# TODO: Complete challenge


# Display the result
# TODO: Display top firms


# ============================================================================
# END OF EXERCISE 1
# ============================================================================
