Integrating with Vercel SDK for Deployment

Now that you’ve mastered FireCrawl’s scraping capabilities, it’s time to deploy your applications to production. Vercel provides an excellent serverless platform for hosting web scraping services with automatic scaling, edge computing, and robust infrastructure.

Understanding Vercel’s Serverless Architecture

Benefits for Web Scraping Applications

  • Automatic Scaling: Handle traffic spikes without manual intervention
  • Edge Computing: Deploy functions closer to users globally
  • Zero Configuration: Deploy with minimal setup
  • Cost Effective: Pay only for actual usage
  • Built-in Monitoring: Track performance and errors

Vercel Functions vs Traditional Servers

// Traditional server approach (not recommended for scraping)
app.get('/scrape', async (req, res) => {
// Long-running process that blocks the server
const result = await scrapeMultiplePages(urls);
res.json(result);
});
// Vercel serverless approach (recommended)
export default async function handler(req: VercelRequest, res: VercelResponse) {
// Stateless, scalable function
const result = await scrapeWithTimeout(url, 30000);
return res.json(result);
}

Setting Up Your Vercel Project

Project Structure for Vercel

scraping-service/
├── api/
│ ├── scrape.ts
│ ├── crawl.ts
│ ├── analyze.ts
│ └── auth/
│ └── validate.ts
├── lib/
│ ├── firecrawl.ts
│ ├── auth.ts
│ ├── rate-limiter.ts
│ └── validators.ts
├── public/
│ └── dashboard/
├── vercel.json
├── package.json
└── .env.local

Vercel Configuration

vercel.json
{
"functions": {
"api/scrape.ts": {
"maxDuration": 60
},
"api/crawl.ts": {
"maxDuration": 300
}
},
"env": {
"FIRECRAWL_API_KEY": "@firecrawl-api-key",
"JWT_SECRET": "@jwt-secret",
"DATABASE_URL": "@database-url"
},
"headers": [
{
"source": "/api/(.*)",
"headers": [
{
"key": "Access-Control-Allow-Origin",
"value": "*"
},
{
"key": "Access-Control-Allow-Methods",
"value": "GET, POST, PUT, DELETE, OPTIONS"
},
{
"key": "Access-Control-Allow-Headers",
"value": "Content-Type, Authorization"
}
]
}
]
}

Building Serverless API Endpoints

Basic Scraping Endpoint

api/scrape.ts
import { VercelRequest, VercelResponse } from '@vercel/node';
import { firecrawlApp } from '../lib/firecrawl';
import { validateApiKey } from '../lib/auth';
import { rateLimiter } from '../lib/rate-limiter';
interface ScrapeRequest {
url: string;
format?: 'markdown' | 'html' | 'structured';
extractionPrompt?: string;
options?: {
onlyMainContent?: boolean;
includeTags?: string[];
excludeTags?: string[];
};
}
interface ScrapeResponse {
success: boolean;
data?: any;
error?: string;
metadata?: {
processingTime: number;
timestamp: string;
creditsUsed: number;
};
}
export default async function handler(
req: VercelRequest,
res: VercelResponse
): Promise<VercelResponse> {
const startTime = Date.now();
// Handle CORS preflight
if (req.method === 'OPTIONS') {
return res.status(200).end();
}
if (req.method !== 'POST') {
return res.status(405).json({
success: false,
error: 'Method not allowed'
});
}
try {
// Validate API key
const apiKey = req.headers.authorization?.replace('Bearer ', '');
const user = await validateApiKey(apiKey);
if (!user) {
return res.status(401).json({
success: false,
error: 'Invalid API key'
});
}
// Rate limiting
const rateLimitResult = await rateLimiter.check(user.id);
if (!rateLimitResult.allowed) {
return res.status(429).json({
success: false,
error: 'Rate limit exceeded',
retryAfter: rateLimitResult.retryAfter
});
}
// Validate request body
const { url, format = 'markdown', extractionPrompt, options }: ScrapeRequest = req.body;
if (!url || !isValidUrl(url)) {
return res.status(400).json({
success: false,
error: 'Valid URL is required'
});
}
// Perform scraping
const scrapeOptions = {
formats: [format],
onlyMainContent: options?.onlyMainContent ?? true,
includeTags: options?.includeTags,
excludeTags: options?.excludeTags,
...(extractionPrompt && {
extractorOptions: {
mode: 'llm-extraction' as const,
extractionPrompt
}
})
};
const result = await firecrawlApp.scrapeUrl(url, scrapeOptions);
if (!result.success) {
return res.status(500).json({
success: false,
error: result.error || 'Scraping failed'
});
}
const response: ScrapeResponse = {
success: true,
data: result.data,
metadata: {
processingTime: Date.now() - startTime,
timestamp: new Date().toISOString(),
creditsUsed: 1 // This would come from FireCrawl's response
}
};
return res.status(200).json(response);
} catch (error) {
console.error('Scraping error:', error);
return res.status(500).json({
success: false,
error: 'Internal server error',
metadata: {
processingTime: Date.now() - startTime,
timestamp: new Date().toISOString(),
creditsUsed: 0
}
});
}
}
function isValidUrl(url: string): boolean {
try {
new URL(url);
return true;
} catch {
return false;
}
}

Batch Crawling Endpoint

api/crawl.ts
import { VercelRequest, VercelResponse } from '@vercel/node';
import { firecrawlApp } from '../lib/firecrawl';
import { validateApiKey } from '../lib/auth';
interface CrawlRequest {
url: string;
maxPages?: number;
includePatterns?: string[];
excludePatterns?: string[];
extractionPrompt?: string;
webhook?: string;
}
export default async function handler(
req: VercelRequest,
res: VercelResponse
): Promise<VercelResponse> {
if (req.method !== 'POST') {
return res.status(405).json({ error: 'Method not allowed' });
}
try {
// Authentication
const apiKey = req.headers.authorization?.replace('Bearer ', '');
const user = await validateApiKey(apiKey);
if (!user) {
return res.status(401).json({ error: 'Invalid API key' });
}
const {
url,
maxPages = 10,
includePatterns,
excludePatterns,
extractionPrompt,
webhook
}: CrawlRequest = req.body;
if (!url) {
return res.status(400).json({ error: 'URL is required' });
}
// Start crawling job
const crawlOptions = {
limit: maxPages,
scrapeOptions: {
formats: ['markdown'] as const,
onlyMainContent: true,
...(extractionPrompt && {
extractorOptions: {
mode: 'llm-extraction' as const,
extractionPrompt
}
})
},
includePaths: includePatterns,
excludePaths: excludePatterns,
...(webhook && { webhook })
};
const crawlResult = await firecrawlApp.crawlUrl(url, crawlOptions);
if (!crawlResult.success) {
return res.status(500).json({
success: false,
error: crawlResult.error
});
}
return res.status(200).json({
success: true,
jobId: crawlResult.jobId,
message: 'Crawl job started',
estimatedPages: maxPages,
webhook: webhook || null
});
} catch (error) {
console.error('Crawl error:', error);
return res.status(500).json({
success: false,
error: 'Internal server error'
});
}
}

Authentication and Security

JWT-based Authentication System

lib/auth.ts
import jwt from 'jsonwebtoken';
import bcrypt from 'bcryptjs';
export interface User {
id: string;
email: string;
apiKey: string;
plan: 'free' | 'pro' | 'enterprise';
rateLimit: {
requestsPerHour: number;
requestsPerDay: number;
};
createdAt: string;
}
export interface ApiKeyPayload {
userId: string;
plan: string;
iat: number;
exp: number;
}
export class AuthService {
private jwtSecret: string;
constructor() {
this.jwtSecret = process.env.JWT_SECRET!;
if (!this.jwtSecret) {
throw new Error('JWT_SECRET environment variable is required');
}
}
generateApiKey(user: User): string {
const payload: Omit<ApiKeyPayload, 'iat' | 'exp'> = {
userId: user.id,
plan: user.plan
};
return jwt.sign(payload, this.jwtSecret, {
expiresIn: '1y',
issuer: 'scraping-service'
});
}
async validateApiKey(apiKey: string | undefined): Promise<User | null> {
if (!apiKey) return null;
try {
const decoded = jwt.verify(apiKey, this.jwtSecret) as ApiKeyPayload;
// In production, fetch user from database
const user = await this.getUserById(decoded.userId);
return user;
} catch (error) {
console.error('API key validation failed:', error);
return null;
}
}
private async getUserById(userId: string): Promise<User | null> {
// Mock implementation - replace with actual database query
const mockUsers: Record<string, User> = {
'user-1': {
id: 'user-1',
email: 'user@example.com',
apiKey: '',
plan: 'pro',
rateLimit: {
requestsPerHour: 1000,
requestsPerDay: 10000
},
createdAt: new Date().toISOString()
}
};
return mockUsers[userId] || null;
}
async hashPassword(password: string): Promise<string> {
return bcrypt.hash(password, 12);
}
async verifyPassword(password: string, hashedPassword: string): Promise<boolean> {
return bcrypt.compare(password, hashedPassword);
}
}
export const authService = new AuthService();
export const validateApiKey = authService.validateApiKey.bind(authService);

Rate Limiting Implementation

lib/rate-limiter.ts
interface RateLimitEntry {
count: number;
resetTime: number;
}
interface RateLimitResult {
allowed: boolean;
remaining: number;
retryAfter?: number;
}
export class RateLimiter {
private cache = new Map<string, RateLimitEntry>();
private defaultLimits = {
free: { requestsPerHour: 100, requestsPerDay: 500 },
pro: { requestsPerHour: 1000, requestsPerDay: 10000 },
enterprise: { requestsPerHour: 10000, requestsPerDay: 100000 }
};
async check(
userId: string,
plan: 'free' | 'pro' | 'enterprise' = 'free',
window: 'hour' | 'day' = 'hour'
): Promise<RateLimitResult> {
const key = `${userId}:${window}`;
const now = Date.now();
const windowMs = window === 'hour' ? 60 * 60 * 1000 : 24 * 60 * 60 * 1000;
const limit = window === 'hour'
? this.defaultLimits[plan].requestsPerHour
: this.defaultLimits[plan].requestsPerDay;
const entry = this.cache.get(key);
if (!entry || now > entry.resetTime) {
// New window or expired entry
this.cache.set(key, {
count: 1,
resetTime: now + windowMs
});
return {
allowed: true,
remaining: limit - 1
};
}
if (entry.count >= limit) {
return {
allowed: false,
remaining: 0,
retryAfter: Math.ceil((entry.resetTime - now) / 1000)
};
}
entry.count++;
this.cache.set(key, entry);
return {
allowed: true,
remaining: limit - entry.count
};
}
// Clean up expired entries periodically
cleanup(): void {
const now = Date.now();
for (const [key, entry] of this.cache.entries()) {
if (now > entry.resetTime) {
this.cache.delete(key);
}
}
}
}
export const rateLimiter = new RateLimiter();
// Clean up every hour
setInterval(() => rateLimiter.cleanup(), 60 * 60 * 1000);

Building a Web Dashboard

Dashboard API Endpoints

api/dashboard/stats.ts
import { VercelRequest, VercelResponse } from '@vercel/node';
import { validateApiKey } from '../../lib/auth';
interface DashboardStats {
usage: {
requestsToday: number;
requestsThisMonth: number;
creditsUsed: number;
creditsRemaining: number;
};
recentJobs: Array<{
id: string;
url: string;
status: 'completed' | 'failed' | 'running';
createdAt: string;
processingTime?: number;
}>;
performance: {
averageResponseTime: number;
successRate: number;
errorRate: number;
};
}
export default async function handler(
req: VercelRequest,
res: VercelResponse
): Promise<VercelResponse> {
if (req.method !== 'GET') {
return res.status(405).json({ error: 'Method not allowed' });
}
try {
const apiKey = req.headers.authorization?.replace('Bearer ', '');
const user = await validateApiKey(apiKey);
if (!user) {
return res.status(401).json({ error: 'Invalid API key' });
}
// Mock data - replace with actual database queries
const stats: DashboardStats = {
usage: {
requestsToday: 45,
requestsThisMonth: 1250,
creditsUsed: 890,
creditsRemaining: 9110
},
recentJobs: [
{
id: 'job-1',
url: 'https://example.com',
status: 'completed',
createdAt: new Date().toISOString(),
processingTime: 2340
},
{
id: 'job-2',
url: 'https://news.example.com',
status: 'running',
createdAt: new Date().toISOString()
}
],
performance: {
averageResponseTime: 2100,
successRate: 98.5,
errorRate: 1.5
}
};
return res.status(200).json(stats);
} catch (error) {
console.error('Dashboard stats error:', error);
return res.status(500).json({ error: 'Internal server error' });
}
}

React Dashboard Component

components/Dashboard.tsx
import { useState, useEffect } from 'react';
interface DashboardData {
usage: {
requestsToday: number;
requestsThisMonth: number;
creditsUsed: number;
creditsRemaining: number;
};
recentJobs: Array<{
id: string;
url: string;
status: 'completed' | 'failed' | 'running';
createdAt: string;
processingTime?: number;
}>;
performance: {
averageResponseTime: number;
successRate: number;
errorRate: number;
};
}
export function Dashboard({ apiKey }: { apiKey: string }) {
const [data, setData] = useState<DashboardData | null>(null);
const [loading, setLoading] = useState(true);
useEffect(() => {
fetchDashboardData();
}, []);
const fetchDashboardData = async () => {
try {
const response = await fetch('/api/dashboard/stats', {
headers: {
'Authorization': `Bearer ${apiKey}`
}
});
if (response.ok) {
const dashboardData = await response.json();
setData(dashboardData);
}
} catch (error) {
console.error('Failed to fetch dashboard data:', error);
} finally {
setLoading(false);
}
};
if (loading) {
return <div className="flex justify-center p-8">Loading...</div>;
}
if (!data) {
return <div className="text-red-500 p-4">Failed to load dashboard data</div>;
}
return (
<div className="max-w-7xl mx-auto p-6 space-y-6">
{/* Usage Stats */}
<div className="grid grid-cols-1 md:grid-cols-4 gap-6">
<StatCard
title="Requests Today"
value={data.usage.requestsToday}
icon="📊"
/>
<StatCard
title="This Month"
value={data.usage.requestsThisMonth}
icon="📈"
/>
<StatCard
title="Credits Used"
value={data.usage.creditsUsed}
icon="🔥"
/>
<StatCard
title="Credits Remaining"
value={data.usage.creditsRemaining}
icon="💰"
/>
</div>
{/* Performance Metrics */}
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
<StatCard
title="Avg Response Time"
value={`${data.performance.averageResponseTime}ms`}
icon="⚡"
/>
<StatCard
title="Success Rate"
value={`${data.performance.successRate}%`}
icon="✅"
/>
<StatCard
title="Error Rate"
value={`${data.performance.errorRate}%`}
icon="❌"
/>
</div>
{/* Recent Jobs */}
<div className="bg-white rounded-lg shadow p-6">
<h3 className="text-lg font-semibold mb-4">Recent Jobs</h3>
<div className="space-y-3">
{data.recentJobs.map((job) => (
<div key={job.id} className="flex items-center justify-between p-3 border rounded">
<div>
<div className="font-medium">{job.url}</div>
<div className="text-sm text-gray-500">
{new Date(job.createdAt).toLocaleString()}
</div>
</div>
<div className="flex items-center space-x-2">
<StatusBadge status={job.status} />
{job.processingTime && (
<span className="text-sm text-gray-500">
{job.processingTime}ms
</span>
)}
</div>
</div>
))}
</div>
</div>
</div>
);
}
function StatCard({ title, value, icon }: { title: string; value: string | number; icon: string }) {
return (
<div className="bg-white rounded-lg shadow p-6">
<div className="flex items-center">
<div className="text-2xl mr-3">{icon}</div>
<div>
<div className="text-sm font-medium text-gray-500">{title}</div>
<div className="text-2xl font-bold">{value}</div>
</div>
</div>
</div>
);
}
function StatusBadge({ status }: { status: string }) {
const colors = {
completed: 'bg-green-100 text-green-800',
failed: 'bg-red-100 text-red-800',
running: 'bg-blue-100 text-blue-800'
};
return (
<span className={`px-2 py-1 text-xs font-medium rounded-full ${colors[status as keyof typeof colors]}`}>
{status}
</span>
);
}

Deployment and Environment Setup

Environment Variables

.env.local
FIRECRAWL_API_KEY=your_firecrawl_api_key
JWT_SECRET=your_jwt_secret_key
DATABASE_URL=your_database_connection_string
VERCEL_URL=your_vercel_domain

Deployment Script

scripts/deploy.ts
import { execSync } from 'child_process';
async function deploy() {
console.log('🚀 Starting deployment...');
try {
// Build the project
console.log('📦 Building project...');
execSync('npm run build', { stdio: 'inherit' });
// Run tests
console.log('🧪 Running tests...');
execSync('npm test', { stdio: 'inherit' });
// Deploy to Vercel
console.log('🌐 Deploying to Vercel...');
execSync('vercel --prod', { stdio: 'inherit' });
console.log('✅ Deployment completed successfully!');
} catch (error) {
console.error('❌ Deployment failed:', error);
process.exit(1);
}
}
deploy();

Package.json Scripts

{
"scripts": {
"dev": "vercel dev",
"build": "tsc",
"test": "jest",
"deploy": "ts-node scripts/deploy.ts",
"deploy:staging": "vercel",
"deploy:prod": "vercel --prod"
}
}

Monitoring and Analytics

Error Tracking

lib/monitoring.ts
export class MonitoringService {
static logError(error: Error, context: any = {}) {
const errorLog = {
message: error.message,
stack: error.stack,
timestamp: new Date().toISOString(),
context,
environment: process.env.NODE_ENV
};
// In production, send to monitoring service (Sentry, LogRocket, etc.)
console.error('Error logged:', errorLog);
}
static logApiCall(endpoint: string, duration: number, success: boolean) {
const log = {
endpoint,
duration,
success,
timestamp: new Date().toISOString()
};
// Send to analytics service
console.log('API call logged:', log);
}
static trackUsage(userId: string, action: string, metadata: any = {}) {
const usage = {
userId,
action,
metadata,
timestamp: new Date().toISOString()
};
// Send to analytics platform
console.log('Usage tracked:', usage);
}
}

Key Takeaways

  • Vercel provides excellent serverless infrastructure for web scraping applications
  • Implement proper authentication and rate limiting for production APIs
  • Use environment variables for sensitive configuration
  • Build monitoring and analytics into your application from the start
  • Create user-friendly dashboards for API management
  • Test thoroughly before deploying to production

Next Steps

In Part 5, we’ll build a complete production web scraping application that combines everything we’ve learned:

  • Full-stack application architecture
  • Real-time job processing
  • Advanced monitoring and alerting
  • User management and billing
  • Performance optimization at scale

You now have the foundation to deploy scalable web scraping services on Vercel!

Share Feedback