epg/sites/tvhebdo.com/tvhebdo.com.config.js

100 lines
3.1 KiB
JavaScript

const cheerio = require('cheerio')
const axios = require('axios')
const dayjs = require('dayjs')
const utc = require('dayjs/plugin/utc')
const timezone = require('dayjs/plugin/timezone')
const customParseFormat = require('dayjs/plugin/customParseFormat')
dayjs.extend(utc)
dayjs.extend(timezone)
dayjs.extend(customParseFormat)
module.exports = {
site: 'tvhebdo.com',
days: 2,
url: function ({ channel, date }) {
return `https://www.tvhebdo.com/horaire-tele/${channel.site_id}/date/${date.format(
'YYYY-MM-DD'
)}`
},
parser: function ({ content, date }) {
let programs = []
const items = parseItems(content)
items.forEach(item => {
const prev = programs[programs.length - 1]
const $item = cheerio.load(item)
let start = parseStart($item, date)
if (prev) {
if (start.isBefore(prev.start)) {
start = start.add(1, 'd')
}
prev.stop = start
}
let stop = start.add(30, 'm')
programs.push({
title: parseTitle($item),
start,
stop
})
})
return programs
},
async channels() {
let items = []
const offsets = [
0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340, 360
]
for (let offset of offsets) {
const url = `https://www.tvhebdo.com/horaire/gr/offset/${offset}/gr_id/0/date/2022-05-11/time/12:00:00`
console.log(url)
const html = await axios
.get(url, {
headers: {
Cookie:
'distributeur=8004264; __utmz=222163677.1652094266.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _gcl_au=1.1.656635701.1652094273; tvh=3c2kaml9u14m83v91bg4dqgaf3; __utmc=222163677; IR_gbd=tvhebdo.com; IR_MPI=cf76b363-cf87-11ec-93f5-13daf79f8f76%7C1652367602625; __utma=222163677.2064368965.1652094266.1652281202.1652281479.3; __utmt=1; IR_MPS=1652284935955%7C1652284314367; _uetsid=0d8e2e60d13b11ec850db551304ae9e7; _uetvid=80456fa0b26e11ec9bf94951ce79b5f8; __utmb=222163677.19.9.1652284953979; __atuvc=30%7C19; __atuvs=627bdb98682bc242006'
}
})
.then(r => r.data)
.catch(console.error)
const $ = cheerio.load(html)
const rows = $('table.gr_row').toArray()
items = items.concat(rows)
}
console.log(`Found ${items.length} channels`)
return items.map(item => {
const $item = cheerio.load(item)
const name = $item('.gr_row_head > div > a.gr_row_head_logo.link_to_station > img').attr(
'alt'
)
const url = $item('.gr_row_head > div > div.gr_row_head_poste > a').attr('href')
const [_, site_id] = url.match(/horaire-tele\/(.*)/) || [null, null]
return {
lang: 'fr',
site_id,
name
}
})
}
}
function parseTitle($item) {
return $item('.titre').first().text().trim()
}
function parseStart($item, date) {
const time = $item('.heure').text()
return dayjs.tz(`${date.format('YYYY-MM-DD')} ${time}`, 'YYYY-MM-DD HH:mm', 'America/Toronto')
}
function parseItems(content) {
const $ = cheerio.load(content)
return $(
'#main_container > div.liste_container > table > tbody > tr[class^=liste_row_style_]'
).toArray()
}