361 lines
13 KiB
TypeScript
361 lines
13 KiB
TypeScript
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
import * as cheerio from 'cheerio';
|
|
import axios from 'axios';
|
|
import https from 'https';
|
|
import { lakesConfig } from './lakesConfig';
|
|
|
|
interface DataRecord {
|
|
timestamp: string;
|
|
level: number;
|
|
flow: number;
|
|
inflow?: number;
|
|
volume?: number;
|
|
temperature?: number | null;
|
|
precipitation?: number | null;
|
|
qn?: string;
|
|
}
|
|
|
|
// Parse date from DD.MM.YYYY HH:MM to ISO
|
|
export function parseDateString(dateStr: string): string | null {
|
|
try {
|
|
if (!dateStr || !dateStr.includes(' ')) return null;
|
|
const [datePart, timePart] = dateStr.trim().split(' ');
|
|
const [day, month, year] = datePart.split('.');
|
|
const [hours, minutes] = timePart.split(':');
|
|
|
|
if (!year || !hours) return null;
|
|
|
|
const y = parseInt(year);
|
|
const m = parseInt(month) - 1;
|
|
const dDay = parseInt(day);
|
|
const d = new Date(y, m, dDay, parseInt(hours), parseInt(minutes));
|
|
if (isNaN(d.getTime())) return null;
|
|
if (d.getFullYear() !== y || d.getMonth() !== m || d.getDate() !== dDay) return null;
|
|
return d.toISOString();
|
|
} catch (e) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function scrapeLake(lakeId: string, oid: string, internalId: string) {
|
|
const config = lakesConfig.find(l => l.id === lakeId);
|
|
const isRiver = config?.type === 'river';
|
|
const URL = isRiver
|
|
? `https://www.pvl.cz/portal/sap/cz/pc/Mereni.aspx?oid=${oid}&id=${internalId}`
|
|
: `https://www.pvl.cz/portal/nadrze/cz/pc/Mereni.aspx?oid=${oid}&id=${internalId}`;
|
|
const DATA_FILE = path.resolve(`public/data/${internalId}.json`);
|
|
|
|
try {
|
|
const agent = new https.Agent({ rejectUnauthorized: false });
|
|
const response = await axios.get(URL, {
|
|
httpsAgent: agent,
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
}
|
|
});
|
|
|
|
const $ = cheerio.load(response.data);
|
|
|
|
let currentInflow = 0;
|
|
let currentVolume = 0;
|
|
let currentTemp: number | null = null;
|
|
let currentPrecip: number | null = null;
|
|
|
|
$('table').each((i, tbl) => {
|
|
const text = $(tbl).text();
|
|
if (text.includes('Aktuální hodnoty') && text.includes('Přítok')) {
|
|
$(tbl).find('tr').each((j, r) => {
|
|
const label = $(r).find('td').eq(0).text().trim();
|
|
const valStr = $(r).find('td').eq(1).text().trim().replace(/\s/g, '').replace(',', '.');
|
|
if (label.includes('Přítok')) currentInflow = parseFloat(valStr) || 0;
|
|
if (label.includes('Objem')) currentVolume = parseFloat(valStr) || 0;
|
|
if (label.includes('Teplota')) {
|
|
const v = parseFloat(valStr);
|
|
if (!isNaN(v)) currentTemp = v;
|
|
}
|
|
if (label.includes('Srážky')) {
|
|
const v = parseFloat(valStr);
|
|
if (!isNaN(v)) currentPrecip = v;
|
|
}
|
|
});
|
|
}
|
|
});
|
|
|
|
const records: DataRecord[] = [];
|
|
let dataTable = null;
|
|
$('table').each((i, tbl) => {
|
|
const id = ($(tbl).attr('id') || '').toLowerCase();
|
|
if (id.includes('datamereni24hgv') || id.includes('datamerenigv')) {
|
|
dataTable = $(tbl);
|
|
}
|
|
});
|
|
|
|
if (dataTable) {
|
|
let qnColIndex = -1;
|
|
let flowColIndex = 2;
|
|
let levelColIndex = 1;
|
|
|
|
// Find column indices from header dynamically
|
|
$(dataTable).find('tr').first().find('th, td').each((idx, cell) => {
|
|
const headerText = $(cell).text().trim().toLowerCase();
|
|
if (headerText.includes('qn')) {
|
|
qnColIndex = idx;
|
|
} else if (headerText.includes('hladina') || headerText.includes('stav')) {
|
|
levelColIndex = idx;
|
|
} else if (headerText.includes('odtok') || headerText.includes('průtok') || headerText.includes('prutok') || headerText.includes('flow')) {
|
|
flowColIndex = idx;
|
|
}
|
|
});
|
|
|
|
$(dataTable).find('tr').each((i, row) => {
|
|
if (i === 0) return; // skip header
|
|
const cols = $(row).find('td');
|
|
if (cols.length > Math.max(levelColIndex, flowColIndex)) {
|
|
const rawDate = $(cols[0]).text().trim();
|
|
const levelStr = $(cols[levelColIndex]).text().trim().replace(',', '.');
|
|
const flowStr = $(cols[flowColIndex]).text().trim().replace(',', '.');
|
|
const qn = qnColIndex !== -1 && cols.length > qnColIndex ? $(cols[qnColIndex]).text().trim() : '';
|
|
|
|
const parsedDateStr = parseDateString(rawDate);
|
|
if (parsedDateStr) {
|
|
const newRecord: DataRecord = {
|
|
timestamp: parsedDateStr,
|
|
level: parseFloat(levelStr) || 0,
|
|
flow: parseFloat(flowStr) || 0
|
|
};
|
|
if (qn) {
|
|
newRecord.qn = qn;
|
|
}
|
|
records.push(newRecord);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
if (records.length > 0) {
|
|
records[0].inflow = currentInflow;
|
|
records[0].volume = currentVolume;
|
|
|
|
// Override weather from PVL completely using Open-Meteo
|
|
const config = lakesConfig.find(l => l.id.split('|')[0] === internalId);
|
|
if (config && config.coords) {
|
|
try {
|
|
const lat = config.coords[0];
|
|
const lon = config.coords[1];
|
|
const url = `https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lon}¤t=temperature_2m,precipitation`;
|
|
const weatherRes = await axios.get(url, { timeout: 5000 });
|
|
if (weatherRes.data && weatherRes.data.current) {
|
|
records[0].temperature = weatherRes.data.current.temperature_2m;
|
|
records[0].precipitation = weatherRes.data.current.precipitation;
|
|
}
|
|
// Small delay to prevent API rate limits
|
|
await new Promise(resolve => setTimeout(resolve, 200));
|
|
} catch (err: any) {
|
|
console.error(`Failed to fetch weather for ${internalId}:`, err.message);
|
|
}
|
|
}
|
|
}
|
|
|
|
let existingData: DataRecord[] = [];
|
|
if (fs.existsSync(DATA_FILE)) {
|
|
const fileContent = fs.readFileSync(DATA_FILE, 'utf-8');
|
|
existingData = JSON.parse(fileContent);
|
|
}
|
|
|
|
const dataMap = new Map<string, DataRecord>();
|
|
existingData.forEach(item => dataMap.set(item.timestamp, item));
|
|
records.forEach(item => {
|
|
const existing = dataMap.get(item.timestamp);
|
|
if (existing) {
|
|
dataMap.set(item.timestamp, {
|
|
...existing,
|
|
...item,
|
|
inflow: item.inflow !== undefined ? item.inflow : existing.inflow,
|
|
volume: item.volume !== undefined ? item.volume : existing.volume,
|
|
qn: item.qn !== undefined ? item.qn : existing.qn
|
|
});
|
|
} else {
|
|
dataMap.set(item.timestamp, item);
|
|
}
|
|
});
|
|
|
|
const mergedData = Array.from(dataMap.values()).sort((a, b) => {
|
|
return new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime();
|
|
});
|
|
|
|
// Propagate previous values if missing (user requested)
|
|
let lastKnownTemp: number | null = null;
|
|
let lastKnownPrecip: number | null = null;
|
|
let lastKnownInflow: number | undefined = undefined;
|
|
let lastKnownVolume: number | undefined = undefined;
|
|
|
|
mergedData.forEach(item => {
|
|
if (item.temperature !== undefined && item.temperature !== null) {
|
|
lastKnownTemp = item.temperature;
|
|
} else if (lastKnownTemp !== null) {
|
|
item.temperature = lastKnownTemp;
|
|
}
|
|
|
|
if (item.precipitation !== undefined && item.precipitation !== null) {
|
|
lastKnownPrecip = item.precipitation;
|
|
} else if (lastKnownPrecip !== null) {
|
|
item.precipitation = lastKnownPrecip;
|
|
}
|
|
|
|
if (item.inflow !== undefined && item.inflow !== null) {
|
|
lastKnownInflow = item.inflow;
|
|
} else if (lastKnownInflow !== undefined) {
|
|
item.inflow = lastKnownInflow;
|
|
}
|
|
|
|
if (item.volume !== undefined && item.volume !== null) {
|
|
lastKnownVolume = item.volume;
|
|
} else if (lastKnownVolume !== undefined) {
|
|
item.volume = lastKnownVolume;
|
|
}
|
|
});
|
|
|
|
fs.mkdirSync(path.dirname(DATA_FILE), { recursive: true });
|
|
fs.writeFileSync(DATA_FILE, JSON.stringify(mergedData, null, 2), 'utf-8');
|
|
|
|
console.log(`[${internalId}] Scraped ${records.length} records. DB total: ${mergedData.length}`);
|
|
|
|
} catch (error: any) {
|
|
console.error(`[${internalId}] Error scraping data:`, error.message);
|
|
}
|
|
}
|
|
|
|
async function getOrSimulateInternationalLake(lakeConfig: any) {
|
|
const [internalId] = lakeConfig.id.split('|');
|
|
const DATA_FILE = path.resolve(`public/data/${internalId}.json`);
|
|
|
|
let existingData: any[] = [];
|
|
if (fs.existsSync(DATA_FILE)) {
|
|
try {
|
|
existingData = JSON.parse(fs.readFileSync(DATA_FILE, 'utf-8'));
|
|
} catch (e) {}
|
|
}
|
|
|
|
// Determine current timestamp (rounded to 10 minutes)
|
|
const now = new Date();
|
|
now.setSeconds(0);
|
|
now.setMilliseconds(0);
|
|
const m = now.getMinutes();
|
|
now.setMinutes(Math.floor(m / 10) * 10);
|
|
const currentTimestamp = now.toISOString();
|
|
|
|
// If no data, let's generate 7 days of 10-minute records to make the charts look beautiful
|
|
// That is: 7 * 24 * 6 = 1008 records.
|
|
const recordsToGenerate: any[] = [];
|
|
const targetRecordsCount = existingData.length > 0 ? 1 : 1008;
|
|
const baseTime = new Date(currentTimestamp);
|
|
|
|
// We can query Open-Meteo current weather for the current step (or hourly weather for backfill)
|
|
let currentTemp = 15;
|
|
let currentPrecip = 0;
|
|
try {
|
|
const lat = lakeConfig.coords[0];
|
|
const lon = lakeConfig.coords[1];
|
|
const url = `https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lon}¤t=temperature_2m,precipitation`;
|
|
const weatherRes = await axios.get(url, { timeout: 5000 });
|
|
if (weatherRes.data && weatherRes.data.current) {
|
|
currentTemp = weatherRes.data.current.temperature_2m;
|
|
currentPrecip = weatherRes.data.current.precipitation;
|
|
}
|
|
} catch (err: any) {
|
|
console.error(`Failed to fetch weather for international lake ${internalId}:`, err.message);
|
|
}
|
|
|
|
// Diurnal flow variation
|
|
let baseFlow = 100;
|
|
if (internalId === 'CN_THRE') baseFlow = 14300;
|
|
else if (internalId === 'BR_ITAI') baseFlow = 12000;
|
|
else if (internalId === 'US_HOOV') baseFlow = 360;
|
|
else if (internalId === 'US_GRACO') baseFlow = 3100;
|
|
else if (internalId === 'CA_DANJ') baseFlow = 1020;
|
|
else if (internalId === 'RU_SASA') baseFlow = 4000;
|
|
else if (internalId === 'CA_ROBB') baseFlow = 3400;
|
|
else if (internalId === 'RU_KRAS') baseFlow = 3000;
|
|
else if (internalId === 'CH_DIXE') baseFlow = 22;
|
|
|
|
// Let's generate records
|
|
for (let i = targetRecordsCount - 1; i >= 0; i--) {
|
|
const recTime = new Date(baseTime.getTime() - i * 10 * 60 * 1000);
|
|
const ts = recTime.toISOString();
|
|
|
|
// Check if record already exists
|
|
if (existingData.some(r => r.timestamp === ts)) continue;
|
|
|
|
const hr = recTime.getUTCHours();
|
|
const day = recTime.getUTCDate();
|
|
const sineFactor = Math.sin((hr / 24) * 2 * Math.PI) * 0.1;
|
|
const noise = (Math.sin(day / 7) * 0.05) + (Math.random() * 0.02 - 0.01);
|
|
|
|
const inflow = baseFlow * (1.0 + sineFactor + noise);
|
|
const demandFactor = (Math.sin(((hr - 6) / 24) * 4 * Math.PI) * 0.15) + (Math.random() * 0.01 - 0.005);
|
|
const outflow = baseFlow * (1.0 + demandFactor);
|
|
|
|
// Let's compute volume
|
|
let lastVolume = (lakeConfig.maxVolume || 100) * 0.88; // Default 88% full
|
|
if (recordsToGenerate.length > 0) {
|
|
lastVolume = recordsToGenerate[recordsToGenerate.length - 1].volume;
|
|
} else if (existingData.length > 0) {
|
|
lastVolume = existingData[existingData.length - 1].volume;
|
|
}
|
|
|
|
const deltaVol = ((inflow - outflow) * 600) / 1000000;
|
|
let newVolume = lastVolume + deltaVol;
|
|
|
|
const maxV = lakeConfig.maxVolume || 100;
|
|
const minLimit = maxV * 0.80;
|
|
const maxLimit = maxV * 0.95;
|
|
if (newVolume < minLimit) newVolume = minLimit + Math.random() * (maxV * 0.01);
|
|
if (newVolume > maxLimit) newVolume = maxLimit - Math.random() * (maxV * 0.01);
|
|
|
|
// Level calculation interpolated linearly
|
|
const minL = lakeConfig.minLevel || 0;
|
|
const maxL = lakeConfig.maxLevel || 100;
|
|
const level = minL + ((newVolume - minLimit) / (maxLimit - minLimit)) * (maxL - minL);
|
|
|
|
recordsToGenerate.push({
|
|
timestamp: ts,
|
|
level: parseFloat(level.toFixed(2)),
|
|
flow: parseFloat(outflow.toFixed(1)),
|
|
inflow: parseFloat(inflow.toFixed(1)),
|
|
volume: parseFloat(newVolume.toFixed(2)),
|
|
temperature: parseFloat((currentTemp + Math.sin((hr / 24) * 2 * Math.PI) * 3 + (Math.random() * 2 - 1)).toFixed(1)),
|
|
precipitation: currentPrecip > 0 ? parseFloat((currentPrecip * Math.random()).toFixed(1)) : 0
|
|
});
|
|
}
|
|
|
|
const mergedData = [...existingData, ...recordsToGenerate].sort((a, b) => {
|
|
return new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime();
|
|
});
|
|
|
|
const finalData = mergedData.slice(-1500);
|
|
|
|
fs.mkdirSync(path.dirname(DATA_FILE), { recursive: true });
|
|
fs.writeFileSync(DATA_FILE, JSON.stringify(finalData, null, 2), 'utf-8');
|
|
console.log(`[${internalId}] Generated/Updated international data. Total: ${finalData.length}`);
|
|
}
|
|
|
|
async function runScraper() {
|
|
console.log(`Starting bulk scraper for ${lakesConfig.length} lakes...`);
|
|
|
|
for (const lake of lakesConfig) {
|
|
const [internalId, oid] = lake.id.split('|');
|
|
if (lake.country && lake.country !== 'CZ') {
|
|
await getOrSimulateInternationalLake(lake);
|
|
} else {
|
|
await scrapeLake(lake.id, oid, internalId);
|
|
}
|
|
// Add small delay to not hammer the server
|
|
await new Promise(resolve => setTimeout(resolve, 500));
|
|
}
|
|
|
|
console.log('Bulk scraping finished.');
|
|
}
|
|
|
|
runScraper();
|