# Exercise 3: Complete Transformation Workflow
# Module 3: Data Wrangling with dplyr
# ============================================================================

# Load required packages
library(dplyr)
library(lubridate)
library(data.table)
library(here)
library(janitor)

# Load data from Intermediate folder
panel_vat <- fread(here("Data", "Intermediate", "panel_vat.csv"), cmd = FALSE)
panel_cit <- fread(here("Data", "Intermediate", "panel_cit.csv"), cmd = FALSE)
dt_firms <- fread(here("Data", "Intermediate", "dt_firms.csv"), cmd = FALSE)

# Convert date columns
panel_vat$declaration_date <- as.Date(panel_vat$declaration_date)
panel_cit$declaration_date <- as.Date(panel_cit$declaration_date)

# ============================================================================
# TASK 1: TRANSFORM PANEL_VAT
# ============================================================================

# Create panel_vat_clean with the following transformations:
# 1. Add date components: filing_year, filing_quarter, filing_month
# 2. Calculate net_vat (outputs - inputs) and vat_ratio (inputs / outputs)
# 3. Create flags: is_refund (net_vat < 0), large_taxpayer (outputs > 40000), 
#    high_ratio (ratio > 0.9)
# 4. Calculate days_since_last declaration per firm

# TODO: Create panel_vat_clean with all transformations


# Save to Clean folder
# TODO: Save panel_vat_clean.csv


# ============================================================================
# TASK 2: TRANSFORM PANEL_CIT
# ============================================================================

# Create panel_cit_clean with:
# 1. Add date components: filing_year, filing_quarter
# 2. Calculate effective_tax_rate (tax_paid / taxable_income)
# 3. Create flag: has_adjustments (adjustments != 0)

# TODO: Create panel_cit_clean with all transformations


# Save to Clean folder
# TODO: Save panel_cit_clean.csv


# ============================================================================
# TASK 3: TRANSFORM DT_FIRMS
# ============================================================================

# Clean column names using janitor::clean_names()
# TODO: Create dt_firms_clean


# Save to Clean folder
# TODO: Save dt_firms_clean.csv


# ============================================================================
# TASK 4: VERIFY YOUR WORK
# ============================================================================

# Check that all files exist
# TODO: Check panel_vat_clean.csv exists


# TODO: Check panel_cit_clean.csv exists


# TODO: Check dt_firms_clean.csv exists


# Load files back and verify columns
# TODO: Load panel_vat_clean and check column names


# TODO: Load panel_cit_clean and check column names


# TODO: Load dt_firms_clean and check column names


# Use glimpse() to verify the structure
# TODO: Glimpse all three datasets


# ============================================================================
# END OF EXERCISE 3
# ============================================================================
