import * as fs from 'fs'; import * as path from 'path'; import * as cheerio from 'cheerio'; import axios from 'axios'; import https from 'https'; import { lakesConfig } from './lakesConfig'; interface DataRecord { timestamp: string; level: number; flow: number; inflow?: number; volume?: number; temperature?: number | null; precipitation?: number | null; qn?: string; } // Parse date from DD.MM.YYYY HH:MM to ISO export function parseDateString(dateStr: string): string | null { try { if (!dateStr || !dateStr.includes(' ')) return null; const [datePart, timePart] = dateStr.trim().split(' '); const [day, month, year] = datePart.split('.'); const [hours, minutes] = timePart.split(':'); if (!year || !hours) return null; const y = parseInt(year); const m = parseInt(month) - 1; const dDay = parseInt(day); const d = new Date(y, m, dDay, parseInt(hours), parseInt(minutes)); if (isNaN(d.getTime())) return null; if (d.getFullYear() !== y || d.getMonth() !== m || d.getDate() !== dDay) return null; return d.toISOString(); } catch (e) { return null; } } async function scrapeLake(lakeId: string, oid: string, internalId: string) { const config = lakesConfig.find(l => l.id === lakeId); const isRiver = config?.type === 'river'; const URL = isRiver ? `https://www.pvl.cz/portal/sap/cz/pc/Mereni.aspx?oid=${oid}&id=${internalId}` : `https://www.pvl.cz/portal/nadrze/cz/pc/Mereni.aspx?oid=${oid}&id=${internalId}`; const DATA_FILE = path.resolve(`public/data/${internalId}.json`); try { const agent = new https.Agent({ rejectUnauthorized: false }); const response = await axios.get(URL, { httpsAgent: agent, headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' } }); const $ = cheerio.load(response.data); let currentInflow = 0; let currentVolume = 0; let currentTemp: number | null = null; let currentPrecip: number | null = null; $('table').each((i, tbl) => { const text = $(tbl).text(); if (text.includes('Aktuální hodnoty') && text.includes('Přítok')) { $(tbl).find('tr').each((j, r) => { const label = $(r).find('td').eq(0).text().trim(); const valStr = $(r).find('td').eq(1).text().trim().replace(/\s/g, '').replace(',', '.'); if (label.includes('Přítok')) currentInflow = parseFloat(valStr) || 0; if (label.includes('Objem')) currentVolume = parseFloat(valStr) || 0; if (label.includes('Teplota')) { const v = parseFloat(valStr); if (!isNaN(v)) currentTemp = v; } if (label.includes('Srážky')) { const v = parseFloat(valStr); if (!isNaN(v)) currentPrecip = v; } }); } }); const records: DataRecord[] = []; let dataTable = null; $('table').each((i, tbl) => { const id = ($(tbl).attr('id') || '').toLowerCase(); if (id.includes('datamereni24hgv') || id.includes('datamerenigv')) { dataTable = $(tbl); } }); if (dataTable) { let qnColIndex = -1; let flowColIndex = 2; let levelColIndex = 1; // Find column indices from header dynamically $(dataTable).find('tr').first().find('th, td').each((idx, cell) => { const headerText = $(cell).text().trim().toLowerCase(); if (headerText.includes('qn')) { qnColIndex = idx; } else if (headerText.includes('hladina') || headerText.includes('stav')) { levelColIndex = idx; } else if (headerText.includes('odtok') || headerText.includes('průtok') || headerText.includes('prutok') || headerText.includes('flow')) { flowColIndex = idx; } }); $(dataTable).find('tr').each((i, row) => { if (i === 0) return; // skip header const cols = $(row).find('td'); if (cols.length > Math.max(levelColIndex, flowColIndex)) { const rawDate = $(cols[0]).text().trim(); const levelStr = $(cols[levelColIndex]).text().trim().replace(',', '.'); const flowStr = $(cols[flowColIndex]).text().trim().replace(',', '.'); const qn = qnColIndex !== -1 && cols.length > qnColIndex ? $(cols[qnColIndex]).text().trim() : ''; const parsedDateStr = parseDateString(rawDate); if (parsedDateStr) { const newRecord: DataRecord = { timestamp: parsedDateStr, level: parseFloat(levelStr) || 0, flow: parseFloat(flowStr) || 0 }; if (qn) { newRecord.qn = qn; } records.push(newRecord); } } }); } if (records.length > 0) { records[0].inflow = currentInflow; records[0].volume = currentVolume; // Override weather from PVL completely using Open-Meteo const config = lakesConfig.find(l => l.id.split('|')[0] === internalId); if (config && config.coords) { try { const lat = config.coords[0]; const lon = config.coords[1]; const url = `https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lon}¤t=temperature_2m,precipitation`; const weatherRes = await axios.get(url, { timeout: 5000 }); if (weatherRes.data && weatherRes.data.current) { records[0].temperature = weatherRes.data.current.temperature_2m; records[0].precipitation = weatherRes.data.current.precipitation; } // Small delay to prevent API rate limits await new Promise(resolve => setTimeout(resolve, 200)); } catch (err: any) { console.error(`Failed to fetch weather for ${internalId}:`, err.message); } } } let existingData: DataRecord[] = []; if (fs.existsSync(DATA_FILE)) { const fileContent = fs.readFileSync(DATA_FILE, 'utf-8'); existingData = JSON.parse(fileContent); } const dataMap = new Map(); existingData.forEach(item => dataMap.set(item.timestamp, item)); records.forEach(item => { const existing = dataMap.get(item.timestamp); if (existing) { dataMap.set(item.timestamp, { ...existing, ...item, inflow: item.inflow !== undefined ? item.inflow : existing.inflow, volume: item.volume !== undefined ? item.volume : existing.volume, qn: item.qn !== undefined ? item.qn : existing.qn }); } else { dataMap.set(item.timestamp, item); } }); const mergedData = Array.from(dataMap.values()).sort((a, b) => { return new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime(); }); // Propagate previous values if missing (user requested) let lastKnownTemp: number | null = null; let lastKnownPrecip: number | null = null; let lastKnownInflow: number | undefined = undefined; let lastKnownVolume: number | undefined = undefined; mergedData.forEach(item => { if (item.temperature !== undefined && item.temperature !== null) { lastKnownTemp = item.temperature; } else if (lastKnownTemp !== null) { item.temperature = lastKnownTemp; } if (item.precipitation !== undefined && item.precipitation !== null) { lastKnownPrecip = item.precipitation; } else if (lastKnownPrecip !== null) { item.precipitation = lastKnownPrecip; } if (item.inflow !== undefined && item.inflow !== null) { lastKnownInflow = item.inflow; } else if (lastKnownInflow !== undefined) { item.inflow = lastKnownInflow; } if (item.volume !== undefined && item.volume !== null) { lastKnownVolume = item.volume; } else if (lastKnownVolume !== undefined) { item.volume = lastKnownVolume; } }); fs.mkdirSync(path.dirname(DATA_FILE), { recursive: true }); fs.writeFileSync(DATA_FILE, JSON.stringify(mergedData, null, 2), 'utf-8'); console.log(`[${internalId}] Scraped ${records.length} records. DB total: ${mergedData.length}`); } catch (error: any) { console.error(`[${internalId}] Error scraping data:`, error.message); } } async function getOrSimulateInternationalLake(lakeConfig: any) { const [internalId] = lakeConfig.id.split('|'); const DATA_FILE = path.resolve(`public/data/${internalId}.json`); let existingData: any[] = []; if (fs.existsSync(DATA_FILE)) { try { existingData = JSON.parse(fs.readFileSync(DATA_FILE, 'utf-8')); } catch (e) {} } // Determine current timestamp (rounded to 10 minutes) const now = new Date(); now.setSeconds(0); now.setMilliseconds(0); const m = now.getMinutes(); now.setMinutes(Math.floor(m / 10) * 10); const currentTimestamp = now.toISOString(); // If no data, let's generate 7 days of 10-minute records to make the charts look beautiful // That is: 7 * 24 * 6 = 1008 records. const recordsToGenerate: any[] = []; const targetRecordsCount = existingData.length > 0 ? 1 : 1008; const baseTime = new Date(currentTimestamp); // We can query Open-Meteo current weather for the current step (or hourly weather for backfill) let currentTemp = 15; let currentPrecip = 0; try { const lat = lakeConfig.coords[0]; const lon = lakeConfig.coords[1]; const url = `https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lon}¤t=temperature_2m,precipitation`; const weatherRes = await axios.get(url, { timeout: 5000 }); if (weatherRes.data && weatherRes.data.current) { currentTemp = weatherRes.data.current.temperature_2m; currentPrecip = weatherRes.data.current.precipitation; } } catch (err: any) { console.error(`Failed to fetch weather for international lake ${internalId}:`, err.message); } // Diurnal flow variation let baseFlow = 100; if (internalId === 'CN_THRE') baseFlow = 14300; else if (internalId === 'BR_ITAI') baseFlow = 12000; else if (internalId === 'US_HOOV') baseFlow = 360; else if (internalId === 'US_GRACO') baseFlow = 3100; else if (internalId === 'CA_DANJ') baseFlow = 1020; else if (internalId === 'RU_SASA') baseFlow = 4000; else if (internalId === 'CA_ROBB') baseFlow = 3400; else if (internalId === 'RU_KRAS') baseFlow = 3000; else if (internalId === 'CH_DIXE') baseFlow = 22; // Let's generate records for (let i = targetRecordsCount - 1; i >= 0; i--) { const recTime = new Date(baseTime.getTime() - i * 10 * 60 * 1000); const ts = recTime.toISOString(); // Check if record already exists if (existingData.some(r => r.timestamp === ts)) continue; const hr = recTime.getUTCHours(); const day = recTime.getUTCDate(); const sineFactor = Math.sin((hr / 24) * 2 * Math.PI) * 0.1; const noise = (Math.sin(day / 7) * 0.05) + (Math.random() * 0.02 - 0.01); const inflow = baseFlow * (1.0 + sineFactor + noise); const demandFactor = (Math.sin(((hr - 6) / 24) * 4 * Math.PI) * 0.15) + (Math.random() * 0.01 - 0.005); const outflow = baseFlow * (1.0 + demandFactor); // Let's compute volume let lastVolume = (lakeConfig.maxVolume || 100) * 0.88; // Default 88% full if (recordsToGenerate.length > 0) { lastVolume = recordsToGenerate[recordsToGenerate.length - 1].volume; } else if (existingData.length > 0) { lastVolume = existingData[existingData.length - 1].volume; } const deltaVol = ((inflow - outflow) * 600) / 1000000; let newVolume = lastVolume + deltaVol; const maxV = lakeConfig.maxVolume || 100; const minLimit = maxV * 0.80; const maxLimit = maxV * 0.95; if (newVolume < minLimit) newVolume = minLimit + Math.random() * (maxV * 0.01); if (newVolume > maxLimit) newVolume = maxLimit - Math.random() * (maxV * 0.01); // Level calculation interpolated linearly const minL = lakeConfig.minLevel || 0; const maxL = lakeConfig.maxLevel || 100; const level = minL + ((newVolume - minLimit) / (maxLimit - minLimit)) * (maxL - minL); recordsToGenerate.push({ timestamp: ts, level: parseFloat(level.toFixed(2)), flow: parseFloat(outflow.toFixed(1)), inflow: parseFloat(inflow.toFixed(1)), volume: parseFloat(newVolume.toFixed(2)), temperature: parseFloat((currentTemp + Math.sin((hr / 24) * 2 * Math.PI) * 3 + (Math.random() * 2 - 1)).toFixed(1)), precipitation: currentPrecip > 0 ? parseFloat((currentPrecip * Math.random()).toFixed(1)) : 0 }); } const mergedData = [...existingData, ...recordsToGenerate].sort((a, b) => { return new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime(); }); const finalData = mergedData.slice(-1500); fs.mkdirSync(path.dirname(DATA_FILE), { recursive: true }); fs.writeFileSync(DATA_FILE, JSON.stringify(finalData, null, 2), 'utf-8'); console.log(`[${internalId}] Generated/Updated international data. Total: ${finalData.length}`); } async function runScraper() { console.log(`Starting bulk scraper for ${lakesConfig.length} lakes...`); for (const lake of lakesConfig) { const [internalId, oid] = lake.id.split('|'); if (lake.country && lake.country !== 'CZ') { await getOrSimulateInternationalLake(lake); } else { await scrapeLake(lake.id, oid, internalId); } // Add small delay to not hammer the server await new Promise(resolve => setTimeout(resolve, 500)); } console.log('Bulk scraping finished.'); } runScraper();