import * as fs from 'fs'; import * as path from 'path'; import * as cheerio from 'cheerio'; import axios from 'axios'; import https from 'https'; import { lakesConfig } from './lakesConfig'; interface DataRecord { timestamp: string; level: number; flow: number; inflow?: number; volume?: number; temperature?: number | null; precipitation?: number | null; } // Parse date from DD.MM.YYYY HH:MM to ISO export function parseDateString(dateStr: string): string | null { try { if (!dateStr || !dateStr.includes(' ')) return null; const [datePart, timePart] = dateStr.trim().split(' '); const [day, month, year] = datePart.split('.'); const [hours, minutes] = timePart.split(':'); if (!year || !hours) return null; const y = parseInt(year); const m = parseInt(month) - 1; const dDay = parseInt(day); const d = new Date(y, m, dDay, parseInt(hours), parseInt(minutes)); if (isNaN(d.getTime())) return null; if (d.getFullYear() !== y || d.getMonth() !== m || d.getDate() !== dDay) return null; return d.toISOString(); } catch (e) { return null; } } async function scrapeLake(lakeId: string, oid: string, internalId: string) { const URL = `https://www.pvl.cz/portal/nadrze/cz/pc/Mereni.aspx?oid=${oid}&id=${internalId}`; const DATA_FILE = path.resolve(`public/data/${internalId}.json`); try { const agent = new https.Agent({ rejectUnauthorized: false }); const response = await axios.get(URL, { httpsAgent: agent, headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' } }); const $ = cheerio.load(response.data); let currentInflow = 0; let currentVolume = 0; let currentTemp: number | null = null; let currentPrecip: number | null = null; $('table').each((i, tbl) => { const text = $(tbl).text(); if (text.includes('Aktuální hodnoty') && text.includes('Přítok')) { $(tbl).find('tr').each((j, r) => { const label = $(r).find('td').eq(0).text().trim(); const valStr = $(r).find('td').eq(1).text().trim().replace(/\s/g, '').replace(',', '.'); if (label.includes('Přítok')) currentInflow = parseFloat(valStr) || 0; if (label.includes('Objem')) currentVolume = parseFloat(valStr) || 0; if (label.includes('Teplota')) { const v = parseFloat(valStr); if (!isNaN(v)) currentTemp = v; } if (label.includes('Srážky')) { const v = parseFloat(valStr); if (!isNaN(v)) currentPrecip = v; } }); } }); const records: DataRecord[] = []; let dataTable = null; $('table').each((i, tbl) => { if ($(tbl).text().includes('Datum') && $(tbl).text().includes('Odtok')) { dataTable = $(tbl); } }); if (dataTable) { dataTable.find('tr').each((i, row) => { if (i === 0) return; // skip header const cols = $(row).find('td'); if (cols.length >= 3) { const rawDate = $(cols[0]).text().trim(); const levelStr = $(cols[1]).text().trim().replace(',', '.'); let flowStr = $(cols[2]).text().trim().replace(',', '.'); if (flowStr === '' && cols.length >= 4) { flowStr = $(cols[3]).text().trim().replace(',', '.'); } const parsedDateStr = parseDateString(rawDate); if (parsedDateStr) { records.push({ timestamp: parsedDateStr, level: parseFloat(levelStr) || 0, flow: parseFloat(flowStr) || 0, inflow: 0, volume: 0 }); } } }); } if (records.length > 0) { // Apply current values to the latest record records[0].inflow = currentInflow; records[0].volume = currentVolume; if (currentTemp !== null) records[0].temperature = currentTemp; if (currentPrecip !== null) records[0].precipitation = currentPrecip; } let existingData: DataRecord[] = []; if (fs.existsSync(DATA_FILE)) { const fileContent = fs.readFileSync(DATA_FILE, 'utf-8'); existingData = JSON.parse(fileContent); } const dataMap = new Map(); existingData.forEach(item => dataMap.set(item.timestamp, item)); records.forEach(item => dataMap.set(item.timestamp, item)); const mergedData = Array.from(dataMap.values()).sort((a, b) => { return new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime(); }); // Propagate previous values if missing (user requested) let lastKnownTemp: number | null = null; let lastKnownPrecip: number | null = null; mergedData.forEach(item => { if (item.temperature !== undefined && item.temperature !== null) { lastKnownTemp = item.temperature; } else if (lastKnownTemp !== null) { item.temperature = lastKnownTemp; } if (item.precipitation !== undefined && item.precipitation !== null) { lastKnownPrecip = item.precipitation; } else if (lastKnownPrecip !== null) { item.precipitation = lastKnownPrecip; } }); fs.mkdirSync(path.dirname(DATA_FILE), { recursive: true }); fs.writeFileSync(DATA_FILE, JSON.stringify(mergedData, null, 2), 'utf-8'); console.log(`[${internalId}] Scraped ${records.length} records. DB total: ${mergedData.length}`); } catch (error: any) { console.error(`[${internalId}] Error scraping data:`, error.message); } } async function runScraper() { console.log(`Starting bulk scraper for ${lakesConfig.length} lakes...`); for (const lake of lakesConfig) { // ID format: VLL1|1 -> internalId=VLL1, oid=1 const [internalId, oid] = lake.id.split('|'); await scrapeLake(lake.id, oid, internalId); // Add small delay to not hammer the server await new Promise(resolve => setTimeout(resolve, 500)); } console.log('Bulk scraping finished.'); } runScraper();