File indexing completed on 2024-04-28 09:19:47

0001 import bs4
0002 import re
0003 import json
0004 import datetime
0005 from .session.session import session
0006 
0007 class YoutubeSearcher:
0008     def __init__(self, location_code=None, user_agent=None):
0009         if location_code:
0010             self.location_code = location_code
0011         else:
0012             self.location_code = "US"
0013         
0014         # TODO make compatibile with mobile user_agents
0015         if user_agent:
0016             self.user_agent = user_agent
0017         else:
0018             self.user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36"
0019         
0020         self.base_url = "https://www.youtube.com"
0021         self.headers = {
0022             'User-Agent': self.user_agent
0023         }
0024         self.featured_channel = {"videos": [], "playlists": []}
0025         self.data = {}
0026         self.videos = []
0027         self.playlists = []
0028         self.related_to_search = []
0029         self.related_queries = []
0030         self.radio = []
0031         self.movies = []
0032         self.promoted = []
0033         self.videos_on_page = []
0034         self.corrected_query = None
0035         self.contents = None
0036         self.primary_contents = None
0037         self.secondary_contents = None
0038         self.primary_contents_page = None
0039     
0040     def search_youtube(self, query, render="all"):
0041         self.featured_channel = {"videos": [], "playlists": []}
0042         self.data = {}
0043         self.videos = []
0044         self.playlists = []
0045         self.related_to_search = []
0046         self.related_queries = []
0047         self.radio = []
0048         self.movies = []
0049         self.promoted = []
0050         self.videos_on_page = []
0051         self.corrected_query = None
0052         self.contents = None
0053         self.primary_contents = None
0054         self.secondary_contents = None
0055         self.primary_contents_page = None
0056 
0057         params = {"search_query": query,
0058                   "gl": self.location_code}
0059         
0060         # TODO dont cache if no results found
0061         html = session.get(self.base_url + "/results", cookies={'CONSENT': 'YES+42'},
0062                            headers=self.headers, params=params).text
0063         soup = bs4.BeautifulSoup(html, 'html.parser')
0064         results = self.santize_soup_result(soup)
0065         data = {"query": query, "corrected_query": query}
0066         
0067         contents = results['contents']['twoColumnSearchResultsRenderer']
0068 
0069         content_checker = contents["primaryContents"]["sectionListRenderer"]["contents"][0]['itemSectionRenderer']['contents']
0070         if "shelfRenderer" in content_checker:
0071             self.primary_contents = contents["primaryContents"]["sectionListRenderer"]["contents"][0]['itemSectionRenderer']['contents'][0]['shelfRenderer']['content']['verticalListRenderer']['items']
0072         else:
0073             self.primary_contents = contents["primaryContents"]["sectionListRenderer"]["contents"][0]['itemSectionRenderer']['contents']
0074 
0075         self.contents = contents
0076 
0077         if render == "all":
0078             self.prepare_feature_channel_info()
0079             self.prepare_videos_info()
0080             self.prepare_playlistRender_info()
0081             self.prepare_horizontalCardList_info()
0082             self.prepare_radioRenderer_info()
0083             self.prepare_movieRenderer_info()
0084             self.prepare_carouselAdRenderer_info()
0085             self.prepare_autoCorrectedQuery_info()
0086             self.prepare_searchPyRenderer_info()
0087             self.filter_for_secondaryContents()
0088             
0089             self.data["videos"] = self.videos
0090             self.data["playlists"] = self.playlists
0091             self.data["featured_channel"] = self.featured_channel
0092             self.data["related_videos"] = self.related_to_search
0093             self.data["related_queries"] = self.related_queries
0094             self.data["full_movies"] = self.movies
0095             self.data["promoted"] = self.promoted
0096             
0097         if render == "featured": 
0098             self.prepare_feature_channel_info()
0099             self.prepare_videos_info()
0100             self.filter_for_secondaryContents()
0101             self.data["featured_channel"] = self.featured_channel
0102             
0103         if render == "videos":
0104             self.prepare_videos_info()
0105             self.data["videos"] = self.videos
0106             
0107         if render == "related":
0108             self.prepare_videos_info()
0109             self.prepare_horizontalCardList_info()
0110             self.data["related_videos"] = self.related_to_search
0111             self.data["related_queries"] = self.related_queries
0112         
0113         return self.data
0114     
0115     def page_search(self, page_type="trending"):
0116         params = {"gl": self.location_code}
0117         
0118         # TODO dont cache if no results found
0119         if page_type == "news":
0120             page = "news"
0121         elif page_type == "music":
0122             page = "music"
0123         elif page_type ==  "entertainment":
0124             page = "entertainment"
0125         else:
0126             page = "feed/trending"
0127         
0128         html = session.get(self.base_url + "/" + page, cookies={'CONSENT': 'YES+42'},
0129                            headers=self.headers, params=params).text
0130         soup = bs4.BeautifulSoup(html, 'html.parser')
0131         #print(soup)
0132         results = self.santize_soup_result(soup)
0133         
0134         contents = results['contents']['twoColumnBrowseResultsRenderer']
0135         self.primary_contents_page = contents['tabs'][0]['tabRenderer']['content'][
0136            'sectionListRenderer']['contents']
0137     
0138         if page == "feed/trending":
0139             self.prepare_pageTrending_info()
0140         else:
0141             self.prepare_pageRequested_info()
0142         
0143         self.data["page_videos"] = self.videos_on_page
0144         
0145         return self.data
0146     
0147     def watchlist_search(self, video_id=None):
0148         related_vids_on_page = []
0149         params = {"gl": self.location_code}
0150         base_url = "https://www.youtube.com/watch?v="
0151         html = session.get(base_url + video_id, cookies={'CONSENT': 'YES+42'},
0152                            headers=self.headers, params=params).text
0153         soup = bs4.BeautifulSoup(html, 'html.parser')
0154         results = self.santize_soup_result(soup)
0155         contents = results['contents']['twoColumnWatchNextResults']['secondaryResults']['secondaryResults']['results']
0156         for x in range(len(contents)):
0157             if "compactVideoRenderer" in contents[x]:
0158                 vid = contents[x]["compactVideoRenderer"]
0159                 thumb = vid["thumbnail"]['thumbnails']
0160                 
0161                 #Get video view count or live watch count
0162                 if "simpleText" in vid["shortViewCountText"]:
0163                     views = vid["shortViewCountText"]["simpleText"]
0164                 else:
0165                     views = vid["shortViewCountText"]["runs"][0]["text"] + " " +  vid["shortViewCountText"]["runs"][1]["text"]
0166                             
0167                 #Get video published_time assume if not available video is Live
0168                 if "publishedTimeText" in vid:
0169                     published_time = vid["publishedTimeText"]["simpleText"]
0170                 else:
0171                     published_time = "Live"
0172                 
0173                 title = vid["title"]["simpleText"]
0174                 
0175                 if 'descriptionSnippet' in vid:
0176                     desc = " ".join([
0177                         r["text"] for r in vid['descriptionSnippet']["runs"]])
0178                 else:  # ocasionally happens
0179                     desc = title
0180                 
0181                 #Length filter for live video
0182                 if "lengthText" in vid:
0183                     length_caption = \
0184                         vid["lengthText"]['accessibility']["accessibilityData"][
0185                             "label"]
0186                     length_txt = vid["lengthText"]['simpleText']
0187                 else:
0188                     length_caption = "Live"
0189                     length_txt = "Live"
0190                         
0191                 if "longBylineText" in vid:
0192                     owner_txt = vid["longBylineText"]["runs"][0]["text"]
0193                         
0194                 videoId = vid['videoId']
0195                 url = \
0196                     vid['navigationEndpoint']['commandMetadata'][
0197                         'webCommandMetadata']['url']
0198                 
0199                 related_vids_on_page.append(
0200                     {
0201                         "url": base_url + vid['videoId'],
0202                         "title": title,
0203                         "length": length_txt,
0204                         "length_human": length_caption,
0205                         "views": views,
0206                         "published_time": published_time,
0207                         "videoId": videoId,
0208                         "thumbnails": thumb,
0209                         "description": desc,
0210                         "channel_name": owner_txt
0211                     }
0212                 )
0213                     
0214         
0215         self.data["watchlist_videos"] = related_vids_on_page
0216         return self.data
0217                         
0218     def santize_soup_result(self, soup_blob):
0219         # Make sure we always get the correct blob and santize it
0220         blob = soup_blob.find('script', text=re.compile("ytInitialData"))
0221         #print(blob)
0222         json_data = str(blob)[str(blob).find('{\"responseContext\"'):str(blob).find('module={}')]
0223         json_data = re.split(r"\};", json_data)[0]
0224         #print(json_data)
0225         results = json.loads(json_data+"}")
0226         return results
0227 
0228     def prepare_feature_channel_info(self):
0229         # because order is not assured we need to make 2 passes over the data
0230         for vid in self.primary_contents:
0231             if 'channelRenderer' in vid:
0232                 vid = vid['channelRenderer']
0233                 user = \
0234                     vid['navigationEndpoint']['commandMetadata']['webCommandMetadata'][
0235                 'url']
0236                 
0237                 self.featured_channel["title"] = vid["title"]["simpleText"]
0238                 
0239                 if 'descriptionSnippet' in vid:
0240                     d = [r["text"] for r in vid['descriptionSnippet']["runs"]]
0241                 else:
0242                     d = vid["title"]["simpleText"].split(" ")
0243                 
0244                 self.featured_channel["description"] = " ".join(d)
0245                 self.featured_channel["user_url"] = self.base_url + user
0246     
0247     def prepare_videos_info(self):
0248         for vid in self.primary_contents:
0249             if 'videoRenderer' in vid:
0250                 vid = vid['videoRenderer']
0251                 thumb = vid["thumbnail"]['thumbnails']
0252                 
0253                 if "shortViewCountText" in vid:
0254                 #Get video view count or live watch count
0255                     if "simpleText" in vid["shortViewCountText"]:
0256                         views = vid["shortViewCountText"]["simpleText"]
0257                     else:
0258                         views = vid["shortViewCountText"]["runs"][0]["text"] + " " +  vid["shortViewCountText"]["runs"][1]["text"]
0259                 else:
0260                     views = " "
0261                 
0262                 #Get video published_time assume if not available video is Live
0263                 if "publishedTimeText" in vid:
0264                     published_time = vid["publishedTimeText"]["simpleText"]
0265                 else:
0266                     published_time = "Live"
0267                     
0268                 title = " ".join([r["text"] for r in vid['title']["runs"]])
0269                 if 'descriptionSnippet' in vid:
0270                     desc = " ".join([
0271                         r["text"] for r in vid['descriptionSnippet']["runs"]])
0272                 else:  # ocasionally happens
0273                     desc = title
0274                     
0275                 #Length filter for live video
0276                 if "lengthText" in vid:
0277                     length_caption = \
0278                         vid["lengthText"]['accessibility']["accessibilityData"][
0279                             "label"]
0280                     length_txt = vid["lengthText"]['simpleText']
0281                 else:
0282                     length_caption = "Live"
0283                     length_txt = "Live"
0284 
0285                 videoId = vid['videoId']
0286                 url = \
0287                     vid['navigationEndpoint']['commandMetadata'][
0288                         'webCommandMetadata']['url']
0289                 
0290                 if "ownerText" in vid:
0291                     owner_txt = vid["ownerText"]["runs"][0]["text"]
0292                 
0293                 self.videos.append(
0294                     {
0295                         "url": self.base_url + url,
0296                         "title": title,
0297                         "length": length_txt,
0298                         "length_human": length_caption,
0299                         "views": views,
0300                         "published_time": published_time,
0301                         "videoId": videoId,
0302                         "thumbnails": thumb,
0303                         "description": desc,
0304                         "channel_name": owner_txt
0305                     }
0306                 )
0307             elif 'shelfRenderer' in vid:
0308                 entries = vid['shelfRenderer']
0309                 #most recent from channel {title_from_step_above}
0310                 #related to your search
0311                 
0312                 if "simpleText" in entries["title"]:
0313                     category = entries["title"]["simpleText"]
0314                 else:
0315                     category = entries["title"]["runs"][0]["text"]
0316                 
0317                 #TODO category localization
0318                 #this comes in lang from your ip address
0319                 #not good to use as dict keys, can assumptions be made about
0320                 #ordering and num of results? last item always seems to be
0321                 #related artists and first (if any) featured channel
0322                 ch = self.featured_channel.get("title", "")
0323                 
0324                 for vid in entries["content"]["verticalListRenderer"]['items']:
0325                     vid = vid['videoRenderer']
0326                     thumb = vid["thumbnail"]['thumbnails']
0327                     d = [r["text"] for r in vid['title']["runs"]]
0328                     title = " ".join(d)
0329                     
0330                     #Get video view count or live watch count
0331                     if "simpleText" in vid["shortViewCountText"]:
0332                         views = vid["viewCountText"]["simpleText"]
0333                     else:
0334                         views = vid["shortViewCountText"]["runs"][0]["text"] + " " +  vid["shortViewCountText"]["runs"][1]["text"]
0335                         
0336                     if "publishedTimeText" in vid:
0337                         published_time = vid["publishedTimeText"]["simpleText"]
0338                     else:
0339                         published_time = "Live"
0340                     
0341                     #Length filter for live video
0342                     if "lengthText" in vid:
0343                         length_caption = \
0344                             vid["lengthText"]['accessibility']["accessibilityData"][
0345                                 "label"]
0346                         length_txt = vid["lengthText"]['simpleText']
0347                     else:
0348                         length_caption = "Live"
0349                         length_txt = "Live"
0350                     
0351                     if "ownerText" in vid:
0352                         owner_txt = vid["ownerText"]["runs"][0]["text"]
0353 
0354                     videoId = vid['videoId']
0355                     url = vid['navigationEndpoint']['commandMetadata'][
0356                         'webCommandMetadata']['url']
0357                     if ch and category.endswith(ch):
0358                         self.featured_channel["videos"].append(
0359                             {
0360                                 "url": self.base_url + url,
0361                                 "title": title,
0362                                 "length": length_txt,
0363                                 "length_human": length_caption,
0364                                 "views": views,
0365                                 "published_time": published_time,
0366                                 "videoId": videoId,
0367                                 "thumbnails": thumb,
0368                                 "channel_name": owner_txt
0369                             }
0370                         )
0371                     else:
0372                         self.related_to_search.append(
0373                             {
0374                                 "url": self.base_url + url,
0375                                 "title": title,
0376                                 "length": length_txt,
0377                                 "length_human": length_caption,
0378                                 "views": views,
0379                                 "published_time": published_time,
0380                                 "videoId": videoId,
0381                                 "thumbnails": thumb,
0382                                 "reason": category,
0383                                 "channel_name": owner_txt
0384                             }
0385                         )
0386 
0387     def prepare_playlistRender_info(self):
0388         for vid in self.primary_contents:
0389             if 'playlistRenderer' in vid:
0390                 vid = vid['playlistRenderer']
0391                 playlist = {
0392                     "title": vid["title"]["simpleText"]
0393                 }
0394                 vid = vid['navigationEndpoint']
0395                 playlist["url"] = \
0396                     self.base_url + vid['commandMetadata']['webCommandMetadata']['url']
0397                 playlist["videoId"] = vid['watchEndpoint']['videoId']
0398                 playlist["playlistId"] = vid['watchEndpoint']['playlistId']
0399                 self.playlists.append(playlist)
0400 
0401     def prepare_horizontalCardList_info(self):
0402         for vid in self.primary_contents:
0403             if 'horizontalCardListRenderer' in vid:
0404                 for vid in vid['horizontalCardListRenderer']['cards']:
0405                     vid = vid['searchRefinementCardRenderer']
0406                     url = \
0407                         vid['searchEndpoint']['commandMetadata'][
0408                             "webCommandMetadata"]["url"]
0409                     self.related_queries.append({
0410                         "title": vid['searchEndpoint']['searchEndpoint']["query"],
0411                         "url": self.base_url + url,
0412                         "thumbnails": vid["thumbnail"]['thumbnails']
0413                     })
0414     
0415     def prepare_radioRenderer_info(self):
0416         for vid in self.primary_contents:
0417             if 'radioRenderer' in vid:
0418                 vid = vid['radioRenderer']
0419                 title = vid["title"]["simpleText"]
0420                 thumb = vid["thumbnail"]['thumbnails']
0421                 vid = vid['navigationEndpoint']
0422                 url = vid['commandMetadata']['webCommandMetadata']['url']
0423                 videoId = vid['watchEndpoint']['videoId']
0424                 playlistId = vid['watchEndpoint']['playlistId']
0425                 self.radio.append({
0426                     "title": title,
0427                     "thumbnails": thumb,
0428                     "url": self.base_url + url,
0429                     "videoId": videoId,
0430                     "playlistId": playlistId
0431                 })
0432 
0433     def prepare_movieRenderer_info(self):
0434         for vid in self.primary_contents:
0435             if 'movieRenderer' in vid:
0436                 vid = vid['movieRenderer']
0437                 title = " ".join([r["text"] for r in vid['title']["runs"]])
0438                 thumb = vid["thumbnail"]['thumbnails']
0439                 videoId = vid['videoId']
0440                 meta = vid['bottomMetadataItems']
0441                 meta = [m["simpleText"] for m in meta]
0442                 desc = " ".join([r["text"] for r in vid['descriptionSnippet']["runs"]])
0443                 url = vid['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
0444                 
0445                 movies.append({
0446                     "title": title,
0447                     "thumbnails": thumb,
0448                     "url": self.base_url + url,
0449                     "videoId": videoId,
0450                     "metadata": meta,
0451                     "description": desc
0452                 })
0453 
0454     def prepare_carouselAdRenderer_info(self):
0455         for vid in self.primary_contents:
0456             if 'carouselAdRenderer' in vid:
0457                 vid = vid["carouselAdRenderer"]
0458                 # skip ads
0459     
0460     def prepare_autoCorrectedQuery_info(self):
0461         for vid in self.primary_contents:
0462             if 'showingResultsForRenderer' in vid:
0463                 q = vid['showingResultsForRenderer']['correctedQuery']
0464                 self.corrected_query = " ".join([r["text"] for r in q["runs"]])
0465 
0466     def prepare_searchPyRenderer_info(self):
0467         for vid in self.primary_contents:
0468             if 'searchPyvRenderer' in vid:
0469                 for entry in vid['searchPyvRenderer']['ads']:
0470                     entry = entry['promotedVideoRenderer']
0471                     desc = entry["description"]['simpleText']
0472                     title = entry['longBylineText']['runs'][0]["text"]
0473                     url = self.base_url + entry['longBylineText']['runs'][0][
0474                         'navigationEndpoint']['browseEndpoint']['canonicalBaseUrl']
0475                     self.promoted.append({
0476                         "title": title,
0477                         "description": desc,
0478                         "url": url
0479                     })
0480 
0481     def filter_for_secondaryContents(self):
0482         if self.contents.get("secondaryContents"):
0483             self.secondary_contents = \
0484                 self.contents["secondaryContents"]["secondarySearchContainerRenderer"][
0485                     "contents"][0]["universalWatchCardRenderer"]
0486             self.prepare_secondaryContentsRender()
0487         
0488 
0489     def prepare_secondaryContentsRender(self):
0490             for vid in self.secondary_contents["sections"]:
0491                 entries = vid['watchCardSectionSequenceRenderer']
0492                 for entry in entries['lists']:
0493                     if 'verticalWatchCardListRenderer' in entry:
0494                         for vid in entry['verticalWatchCardListRenderer']["items"]:
0495                             vid = vid['watchCardCompactVideoRenderer']
0496                             thumbs = vid['thumbnail']['thumbnails']
0497                             
0498                             d = [r["text"] for r in vid['title']["runs"]]
0499                             title = " ".join(d)
0500                             url = vid['navigationEndpoint']['commandMetadata'][
0501                                 'webCommandMetadata']['url']
0502                             videoId = vid['navigationEndpoint']['watchEndpoint'][
0503                                 'videoId']
0504                             playlistId = \
0505                                 vid['navigationEndpoint']['watchEndpoint']['playlistId']
0506                             length_caption = \
0507                                 vid["lengthText"]['accessibility'][
0508                                     "accessibilityData"]["label"]
0509                             length_txt = vid["lengthText"]['simpleText']
0510 
0511                             #TODO investigate
0512                             #These seem to always be from featured channel
0513                             #playlistId doesnt match any extracted playlist
0514                             self.featured_channel["videos"].append({
0515                                 "url": self.base_url + url,
0516                                 "title": title,
0517                                 "length": length_txt,
0518                                 "length_human": length_caption,
0519                                 "videoId": videoId,
0520                                 "playlistId": playlistId,
0521                                 "thumbnails": thumbs
0522                             })
0523                     elif 'horizontalCardListRenderer' in entry:
0524                         for vid in entry['horizontalCardListRenderer']['cards']:
0525                             vid = vid['searchRefinementCardRenderer']
0526                             playlistId = \
0527                                 vid['searchEndpoint']['watchPlaylistEndpoint'][
0528                                     'playlistId']
0529                             thumbs = vid['thumbnail']['thumbnails']
0530                             url = vid['searchEndpoint']['commandMetadata'][
0531                                 'webCommandMetadata']['url']
0532                             d = [r["text"] for r in vid['query']["runs"]]
0533                             title = " ".join(d)
0534                             self.featured_channel["playlists"].append({
0535                                 "url": self.base_url + url,
0536                                 "title": title,
0537                                 "thumbnails": thumbs,
0538                                 "playlistId": playlistId
0539                             })
0540 
0541     def prepare_pageTrending_info(self):
0542         for items in self.primary_contents_page:
0543             if 'itemSectionRenderer' in items:
0544                 i_items = items['itemSectionRenderer']['contents'][0]['shelfRenderer']['content']
0545                 if 'expandedShelfContentsRenderer' in i_items:
0546                     page_items = items['itemSectionRenderer']['contents'][0]['shelfRenderer']['content']['expandedShelfContentsRenderer']['items']
0547                 else:
0548                     page_items = []
0549 
0550                 for x in range(len(page_items)):
0551                     if 'videoRenderer' in page_items[x]:
0552                         vid = page_items[x]['videoRenderer']
0553                         thumb = vid["thumbnail"]['thumbnails']
0554                         
0555                         #Get video view count or live watch count
0556                         try:
0557                             if "simpleText" in vid["shortViewCountText"]:
0558                                 views = vid["shortViewCountText"]["simpleText"]
0559                             else:
0560                                 views = vid["shortViewCountText"]["runs"][0]["text"] + " " +  vid["shortViewCountText"]["runs"][1]["text"]
0561                         except:
0562                             views = "Live"
0563                             
0564                         #Get video published_time assume if not available video is Live
0565                         try:
0566                             if "publishedTimeText" in vid:
0567                                 published_time = vid["publishedTimeText"]["simpleText"]
0568                             else:
0569                                 published_time = "Live"
0570                         except:
0571                             published_time = "Now Streaming"
0572                         
0573                         title = " ".join([r["text"] for r in vid['title']["runs"]])
0574                         
0575                         if 'descriptionSnippet' in vid:
0576                             desc = " ".join([
0577                                 r["text"] for r in vid['descriptionSnippet']["runs"]])
0578                         else:  # ocasionally happens
0579                             desc = title
0580                         
0581                         #Length filter for live video
0582                         if "lengthText" in vid:
0583                             length_caption = \
0584                                 vid["lengthText"]['accessibility']["accessibilityData"][
0585                                     "label"]
0586                             length_txt = vid["lengthText"]['simpleText']
0587                         else:
0588                             length_caption = "Live"
0589                             length_txt = "Live"
0590                         
0591                         if "ownerText" in vid:
0592                             owner_txt = vid["ownerText"]["runs"][0]["text"]
0593                         
0594                         videoId = vid['videoId']
0595                         url = \
0596                             vid['navigationEndpoint']['commandMetadata'][
0597                                 'webCommandMetadata']['url']
0598                         self.videos_on_page.append(
0599                             {
0600                                 "url": self.base_url + url,
0601                                 "title": title,
0602                                 "length": length_txt,
0603                                 "length_human": length_caption,
0604                                 "views": views,
0605                                 "published_time": published_time,
0606                                 "videoId": videoId,
0607                                 "thumbnails": thumb,
0608                                 "description": desc,
0609                                 "channel_name": owner_txt
0610                             }
0611                         )
0612 
0613     def prepare_pageRequested_info(self):
0614         for items in self.primary_contents_page:
0615             if 'itemSectionRenderer' in items:
0616                 page_items = items['itemSectionRenderer']['contents'][0]['shelfRenderer']['content']['horizontalListRenderer']['items']
0617                 for x in range(len(page_items)):
0618                     if 'gridVideoRenderer' in page_items[x]:
0619                         vid = page_items[x]['gridVideoRenderer']
0620                         thumb = vid["thumbnail"]['thumbnails']
0621                         
0622                         #Get video view count or live watch count
0623                         if "shortViewCountText" in vid:
0624                             if "simpleText" in vid["shortViewCountText"]:
0625                                 views = vid["shortViewCountText"]["simpleText"]
0626                             else:
0627                                 views = vid["shortViewCountText"]["runs"][0]["text"] + " " +  vid["shortViewCountText"]["runs"][1]["text"]
0628                         else:
0629                             views = "unavailable"
0630                             
0631                         #Get video published_time assume if not available video is Live
0632                         if "publishedTimeText" in vid:
0633                             published_time = vid["publishedTimeText"]["simpleText"]
0634                         else:
0635                             published_time = "Live"
0636                         
0637                         #title = " ".join([r["text"] for r in vid['title']["runs"]])
0638                         title = vid['title']['simpleText']
0639                         
0640                         if 'descriptionSnippet' in vid:
0641                             desc = " ".join([
0642                                 r["text"] for r in vid['descriptionSnippet']["runs"]])
0643                         else:  # ocasionally happens
0644                             desc = title
0645                         
0646                         #Length filter for live video
0647                         overlayInformation = vid['thumbnailOverlays'][0]
0648                         if "thumbnailOverlayTimeStatusRenderer" in overlayInformation:
0649                             length_caption = \
0650                                 overlayInformation['thumbnailOverlayTimeStatusRenderer']['text']['accessibility']["accessibilityData"][
0651                                     "label"]
0652                             length_txt = overlayInformation['thumbnailOverlayTimeStatusRenderer']['text']['simpleText']
0653                         else:
0654                             length_caption = "Live"
0655                             length_txt = "Live"
0656                         
0657                         videoId = vid['videoId']
0658                         url = \
0659                             vid['navigationEndpoint']['commandMetadata'][
0660                                 'webCommandMetadata']['url']
0661                         self.videos_on_page.append(
0662                             {
0663                                 "url": self.base_url + url,
0664                                 "title": title,
0665                                 "length": length_txt,
0666                                 "length_human": length_caption,
0667                                 "views": views,
0668                                 "published_time": published_time,
0669                                 "videoId": videoId,
0670                                 "thumbnails": thumb,
0671                                 "description": desc
0672                             }
0673                         )
0674 
0675     def extract_video_meta(self, url):
0676         params = {"gl": "US"}
0677         html = session.get(url, cookies={'CONSENT': 'YES+42'},
0678                            headers=self.headers, params=params).text
0679         soup = bs4.BeautifulSoup(html, 'html.parser')
0680         results = self.santize_soup_result(soup)
0681         contents = results['contents']['twoColumnWatchNextResults']['results']['results']['contents'][0]['videoPrimaryInfoRenderer']
0682         secondaryContents = results['contents']['twoColumnWatchNextResults']['results']['results']['contents'][1]['videoSecondaryInfoRenderer']
0683         title = contents['title']['runs'][0]['text']
0684         try:
0685             viewCount = contents['viewCount']['videoViewCountRenderer']['viewCount']['simpleText']
0686         except:
0687             viewCount = "Live"
0688         author = secondaryContents['owner']['videoOwnerRenderer']['title']['runs'][0]['text']
0689         try:
0690             actualDate = contents['dateText']['simpleText'] + "  12:00AM"
0691             publishedDate = datetime.datetime.strptime(actualDate, '%d %b %Y %I:%M%p')
0692         except:
0693             publishedDate = "Live"
0694         
0695         vidmetadata = {
0696             "title": title,
0697             "views": viewCount,
0698             "published_time": publishedDate,
0699             "channel_name": author
0700         }
0701         return vidmetadata