Hello I am grabbing data from a web site to use it with a receiver for epg guide everything is okay I am only have a problem with shows starting from 00:00 they don't take next day date.
here is the output :
Series +2 Channel
current show : سبع أرواح (7 أرواح) start at : Thursday 2 April 11:30 ends at : Thursday 2 April 12:30
.... .
.....
current show : الخان start at : Thursday 2 April 23:30 ends at : Friday 3 April 00:30
.....
.......
Dubai One Channel
current show : San Andreas start at : Thursday 2 April 10:00 ends at : Thursday 2 April 12:00
.......
..............
.................
current show : أخبار الإمارات start at : Thursday 2 April 23:30 ends at : Friday 2 April 00:00
.......
...............
current show : ﺑﺮﻧﺎﻣﺞ Dubai Eye On Dubai One start at : Friday 3 April 04:00 ends at : Friday 3 April 08:00
.................
.......................
current show : The Internship start at : Friday 3 April 21:00 ends at : Friday 3 April 23:00
current show : Morocco: Love in Times of War start at : Friday 3 April 23:00 ends at : Friday 3 April 00:00
current show : Empire start at : Friday 3 April 00:00 ends at : Saturday 4 April 01:00
Alhayat TV Channel
..............
......................
............................
current show : ﺑﺮﻧﺎﻣﺞ start at : Thursday 2 April 21:00 ends at : Thursday 2 April 22:30
current show : عين start at : Thursday 2 April 22:30 ends at : Thursday 2 April 23:00
current show : برنامج start at : Thursday 2 April 23:00 ends at : Friday 3 April 00:00
current show : برنامج start at : Friday 3 April 00:00 ends at : Friday 3 April 00:30
current show : الحياة اليوم start at : Friday 3 April 00:30 ends at : Friday 3 April 02:00
............................
as we see it's okay for some channels but not consistant in others
what i expect :
Dubai One Channel
current show : San Andreas start at : Thursday 2 April 10:00 ends at : Thursday 3 April 12:00
.......
..............
.................
current show : أخبار الإمارات start at : Thursday 2 April 23:30 ends at : Friday 3 April 00:00
.......
...............
current show : ﺑﺮﻧﺎﻣﺞ Dubai Eye On Dubai One start at : Friday 3 April 04:00 ends at : Friday 3 April 08:00
.................
.......................
current show : The Internship start at : Friday 3 April 21:00 ends at : Friday 3 April 23:00
current show : Morocco: Love in Times of War start at : Friday 3 April 23:00 ends at : Friday 4 April 00:00
current show : Empire start at : Friday 4 April 00:00 ends at : Saturday 4 April 01:00
What I have tried:
import requests,re
from datetime import datetime
from time import sleep
headers={
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) snap Chromium/80.0.3987.149 Chrome/80.0.3987.149 Safari/537.36'
}
days=[]
times=[]
titles=[]
des=[]
cat=[]
prog=[]
now = datetime.today().year
nb_channel=['1186','1173','1169','1137','1223','1176','1199','1156','1239','1262','1227','1278','1198','1177','1193','1246','1216','1158','1135',
'1170','1159','1226','1168','1292','1203','1101','1134','1283','1188','1260','1290','1204','1269','1280',
'1300','1298','1297','1301','1299','1296','1304','1317','1302','1312','1321','1338','1339','1353','1350','1355']
def elci():
for nb in nb_channel:
try:
url = requests.get('http://elcinema.com/en/tvguide/'+nb+'/',headers=headers)
time_d = re.findall(r'\d{2}:\d{2}\s+\w\w|<div\sclass=\" dates\">\s+(.*)',url.text)
time = re.findall(r'\d{2}:\d{2}\s+\w\w',url.text)
channel_name=re.findall(r'<li>(.*?)<\/li>\s+<li\sclass=\"localization\">',url.text)
print('\n'.join(channel_name))
days[:]=[]
times[:]=[]
titles[:]=[]
des[:]=[]
cat[:]=[]
prog[:]=[]
for ti in time:
times.append(datetime.strptime(ti, '%I:%M %p').strftime('%H:%M'))
for i, val in enumerate(time_d):
if not val:
time_d[i] = time_d[i-1]
days.append(time_d[i])
url_ar = requests.get('http://elcinema.com/ar/tvguide/'+nb+'/',headers=headers)
first=re.findall(r'<li>(.*?)<a\shref=\'#\'\sid=\'read-more\'>',url_ar.text)
last=re.findall(r"<span class='hide'>[^\n]+",url_ar.text)
desc=re.findall(r'<\/a><\/li>\s+<li>\s+\s+(.*\s+.*?)\s+<\/li>\s+<li>',url_ar.text)
descc = [re.sub(' +',' ',d).replace('\n','') for d in desc]
for f,l in zip(first,last):
des.append(f+l.replace("<span class='hide'>",'').replace('</span></li>',''))
for dess in descc:
cat.append(dess)
title_l = re.findall(r'<a\shref=\"\/work\/\d+\/\">(.*?)<\/a><\/li',url_ar.text)
mt2 = re.findall(r'<a\shref=\"\/work\/\d+\/\">(.*?)<\/a><\/li|columns small-7 large-11\">\s+<ul class=\"unstyled no-margin\">\s+<li>(.*?)<\/li>',url_ar.text)
for m in mt2:
if m[0]=='':
titles.append(m[1])
else:
titles.append(m[0])
for index, element in enumerate(titles):
if element not in title_l:
des.insert(index,titles[index])
cat.insert(index,titles[index])
for elem,next_elem,td1,td2,tit in zip(times, times[1:]+[times[0]],time_d,time_d[1:]+[time_d[0]],titles):
print('current show : '+tit+' start at : '+td1+' '+elem+' ends at : '+td2+' '+next_elem+'\n')
except:pass
if __name__=='__main__':
elci()