Awesome-Agent-Skills-for-Empirical-Research econometrics-julia

Julia-based econometric and structural estimation for computationally intensive tasks. Use for structural models, maximum likelihood, GMM, numerical optimization, simulations, and high-performance computing. Covers DataFrames.jl, FixedEffectModels.jl, Optim.jl, and performance optimization.

install

source · Clone the upstream repo

git clone https://github.com/brycewang-stanford/Awesome-Agent-Skills-for-Empirical-Research

Claude Code · Install into ~/.claude/skills/

T=$(mktemp -d) && git clone --depth=1 https://github.com/brycewang-stanford/Awesome-Agent-Skills-for-Empirical-Research "$T" && mkdir -p ~/.claude/skills && cp -r "$T/skills/14-luischanci-claude-code-research-starter/dot-claude/skills/econometrics-julia" ~/.claude/skills/brycewang-stanford-awesome-agent-skills-for-empirical-research-econometrics-juli && rm -rf "$T"

manifest: skills/14-luischanci-claude-code-research-starter/dot-claude/skills/econometrics-julia/SKILL.md

source content

Julia Econometrics & Structural Estimation

Core Packages

using DataFrames, CSV       # Data handling
using FixedEffectModels     # Panel regressions
using Optim                 # Optimization
using Distributions         # Probability distributions
using LinearAlgebra         # Matrix operations
using ForwardDiff           # Automatic differentiation
using Statistics, StatsBase

Data Handling (DataFrames.jl)

# Read data
df = CSV.read("data/raw/file.csv", DataFrame)

# Basic operations
transform!(df, :x => (x -> x .* 100) => :x_scaled)
subset!(df, :year => y -> y .>= 2000)

# Groupby operations
gdf = groupby(df, :id)
combine(gdf, :y => mean => :mean_y, nrow => :n)

# Efficient joins
leftjoin!(df, other_df, on = [:id, :year])

Panel Regressions (FixedEffectModels.jl)

# Two-way fixed effects
est = reg(df, @formula(y ~ treatment + fe(id) + fe(year)), 
          Vcov.cluster(:state))

# IV estimation
est_iv = reg(df, @formula(y ~ (endog ~ instrument) + fe(id) + fe(year)))

# Extract results
coef(est)        # Coefficients
vcov(est)        # Variance-covariance matrix
nobs(est)        # Observations
r2(est)          # R-squared

Performance Optimization

Memory Efficiency

# Use views instead of copies
@views y_subset = df.y[1:100]

# Preallocate arrays
result = zeros(n, m)
@inbounds for i in 1:n
    result[i, :] .= compute_row(i)
end

# Use StaticArrays for small fixed-size arrays
using StaticArrays
params = SVector{3, Float64}(1.0, 2.0, 3.0)

Broadcasting and Vectorization

# Good: fused broadcasting
@. result = x^2 + y^2 + z  # Single pass

# Avoid: creates intermediates
result = x.^2 .+ y.^2 .+ z  # Multiple allocations

Type Stability

# Always annotate function return types for critical code
function likelihood(θ::Vector{Float64}, data::Matrix{Float64})::Float64
    # Implementation
end

# Use concrete types in structs
struct ModelParams{T<:Real}
    α::T
    β::T
    σ::T
end

Structural Estimation

Maximum Likelihood

function log_likelihood(θ, data)
    α, β, σ = θ
    y, X = data[:, 1], data[:, 2:end]
    
    resid = y .- X * [α, β]
    ll = -0.5 * length(y) * log(2π * σ^2) - 
         sum(resid.^2) / (2σ^2)
    return -ll  # Minimize negative log-likelihood
end

# Optimize
θ0 = [0.0, 0.0, 1.0]
result = optimize(θ -> log_likelihood(θ, data), θ0, LBFGS(),
                  autodiff = :forward)

θ_hat = Optim.minimizer(result)

Generalized Method of Moments (GMM)

function gmm_moments(θ, data, Z)
    α, β = θ
    y, X = data[:, 1], data[:, 2:end]
    resid = y .- X * [α, β]
    
    # Moment conditions: E[Z'ε] = 0
    moments = Z' * resid / size(Z, 1)
    return moments
end

function gmm_objective(θ, data, Z, W)
    g = gmm_moments(θ, data, Z)
    return g' * W * g
end

# Two-step GMM
W1 = I(size(Z, 2))  # First step: identity
θ1 = optimize(θ -> gmm_objective(θ, data, Z, W1), θ0).minimizer

# Optimal weighting matrix
Ω = cov(Z .* (data[:, 1] .- data[:, 2:end] * θ1))
W2 = inv(Ω)
θ2 = optimize(θ -> gmm_objective(θ, data, Z, W2), θ1).minimizer

Simulation-Based Estimation (MSM/Indirect Inference)

function simulate_model(θ, N, S; seed=12345)
    Random.seed!(seed)
    α, β, σ = θ
    
    # Simulate S datasets of size N
    sims = Matrix{Float64}(undef, N, S)
    for s in 1:S
        X = randn(N)
        ε = σ * randn(N)
        sims[:, s] = α .+ β .* X .+ ε
    end
    return sims
end

function msm_objective(θ, data_moments, N, S)
    sims = simulate_model(θ, N, S)
    sim_moments = mean([compute_moments(sims[:, s]) for s in 1:S])
    diff = data_moments - sim_moments
    return diff' * diff
end

Numerical Methods

Root Finding (for equilibrium models)

using NLsolve

function excess_demand(p, params)
    # Market clearing conditions
    return demand(p, params) - supply(p, params)
end

result = nlsolve(p -> excess_demand(p, params), p0)
p_eq = result.zero

Integration (for expected values)

using QuadGK

# One-dimensional
expected_value, err = quadgk(x -> x * pdf(Normal(μ, σ), x), -Inf, Inf)

# Multi-dimensional (Monte Carlo or sparse grids)
using HCubature
integral, err = hcubature(f, lower_bounds, upper_bounds)

Parallel Computing

using Distributed
addprocs(4)

@everywhere using SharedArrays

# Parallel bootstrap
@distributed for b in 1:B
    bootstrap_sample = sample(1:N, N, replace=true)
    θ_boot[b] = estimate(data[bootstrap_sample, :])
end

# Or use Threads for shared memory
Threads.@threads for i in 1:N
    result[i] = compute(i)
end

Standard Errors

Bootstrap

function bootstrap_se(estimate_fn, data, B=1000)
    N = size(data, 1)
    θ_boot = [estimate_fn(data[sample(1:N, N, replace=true), :]) 
              for _ in 1:B]
    return std(θ_boot)
end

Delta Method

using ForwardDiff

function delta_method_se(g, θ, Σ)
    # g: transformation function
    # θ: parameter estimates
    # Σ: variance-covariance of θ
    ∇g = ForwardDiff.gradient(g, θ)
    return sqrt(∇g' * Σ * ∇g)
end

Best Practices

Use
```
@time
```
and
```
@btime
```
(from BenchmarkTools) to profile
Check type stability with
```
@code_warntype
```
Use
```
const
```
for global constants
Prefer column-major iteration (Julia is column-major)