import * as fs from 'fs'; import * as path from 'path'; import * as cheerio from 'cheerio'; import axios from 'axios'; import https from 'https'; const URL = 'https://www.pvl.cz/portal/nadrze/cz/pc/Mereni.aspx?oid=1&id=VLL1'; const DATA_FILE = path.resolve('public/data/lipno.json'); interface DataRecord { timestamp: string; level: number; flow: number; } // Parse date from DD.MM.YYYY HH:MM to ISO function parseDateString(dateStr: string): string { const [datePart, timePart] = dateStr.trim().split(' '); const [day, month, year] = datePart.split('.'); const [hours, minutes] = timePart.split(':'); const d = new Date(parseInt(year), parseInt(month) - 1, parseInt(day), parseInt(hours), parseInt(minutes)); return d.toISOString(); } async function scrape(): Promise { try { const agent = new https.Agent({ rejectUnauthorized: false }); const response = await axios.get(URL, { httpsAgent: agent, headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' } }); const html = response.data; const $ = cheerio.load(html); const rows = $('table tr'); const newData: DataRecord[] = []; rows.each((i, row) => { const tds = $(row).find('td'); if (tds.length >= 3) { const datetimeText = $(tds[0]).text().trim(); // Check if it's a valid date string matching DD.MM.YYYY HH:MM if (/^\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2}$/.test(datetimeText)) { const timestamp = parseDateString(datetimeText); const levelText = $(tds[1]).text().trim().replace(',', '.'); const flowText = $(tds[2]).text().trim().replace(',', '.'); newData.push({ timestamp, level: parseFloat(levelText), flow: parseFloat(flowText) }); } } }); // Load existing data let existingData: DataRecord[] = []; if (fs.existsSync(DATA_FILE)) { const fileContent = fs.readFileSync(DATA_FILE, 'utf-8'); existingData = JSON.parse(fileContent); } // Merge and deduplicate by timestamp const dataMap = new Map(); existingData.forEach(item => dataMap.set(item.timestamp, item)); newData.forEach(item => dataMap.set(item.timestamp, item)); // Sort chronologically const mergedData = Array.from(dataMap.values()).sort((a, b) => { return new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime(); }); // Save back fs.mkdirSync(path.dirname(DATA_FILE), { recursive: true }); fs.writeFileSync(DATA_FILE, JSON.stringify(mergedData, null, 2), 'utf-8'); console.log(`Scraped ${newData.length} records. Total records in DB: ${mergedData.length}`); } catch (error: any) { console.error('Error scraping data:', error.message); process.exit(1); } } scrape();