Video library and YouTube#
This page shows stats about the video library and OLS YouTube
import matplotlib.pyplot as plt
import pandas as pd
import yt_dlp
Video library#
The video library contains available videos from talks in Open Seeds cohort calls.
baseurl = "https://raw.githubusercontent.com/open-life-science/open-life-science.github.io/main/_data/artifacts/openseeds/"
library_df = pd.read_csv(f"{baseurl}library.csv", index_col=0, na_filter= False)
Number of talks
len(library_df)
236
Speakers#
speaker_df = (
library_df
.groupby(by="speakers")
.count()
.drop(columns = ["title", "date", "cohort", "tag", "subtag", "recording"])
.rename(columns = {"slides": "Total"})
)
Number of speakers
len(speaker_df)
124
Mean number of talks per speaker
speaker_df["Total"].mean()
np.float64(1.903225806451613)
Median number of talks per speaker
speaker_df["Total"].median()
np.float64(1.0)
Distribution of the number of talks per speaker
fig, ax = plt.subplots()
fig.set_dpi(300)
speaker_df.plot.hist(
bins=25, ax=ax, legend=False, color="#139D3D"
)
plt.xlabel('Number of talks')
Text(0.5, 0, 'Number of talks')
YouTube stats#
All videos from Open Seeds calls are uploaded on the OLS YouTube channel
%%capture
ydl_opts = {}
URL = "https://www.youtube.com/c/OpenLifeSci"
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(URL, download=False)
# ydl.sanitize_info makes the info json-serializable
channel_content = ydl.sanitize_info(info)
# extract video information
videos = []
for v in channel_content['entries'][0]['entries']:
videos.append({key:v[key] for key in ['title', 'duration', 'view_count']})
yt_stat_df = (
pd.DataFrame(videos)
.assign(Duration=lambda df: df.duration/60)
.drop(columns=["duration"])
.rename(columns=str.capitalize)
)
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In[10], line 4
2 URL = "https://www.youtube.com/c/OpenLifeSci"
3 with yt_dlp.YoutubeDL(ydl_opts) as ydl:
----> 4 info = ydl.extract_info(URL, download=False)
5 # ydl.sanitize_info makes the info json-serializable
6 channel_content = ydl.sanitize_info(info)
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1611, in YoutubeDL.extract_info(self, url, download, ie_key, extra_info, process, force_generic_extractor)
1609 raise ExistingVideoReached
1610 break
-> 1611 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1612 else:
1613 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1622, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
1620 while True:
1621 try:
-> 1622 return func(self, *args, **kwargs)
1623 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1624 raise
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1778, in YoutubeDL.__extract_info(self, url, ie, download, extra_info, process)
1776 if process:
1777 self._wait_for_video(ie_result)
-> 1778 return self.process_ie_result(ie_result, download, extra_info)
1779 else:
1780 return ie_result
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1907, in YoutubeDL.process_ie_result(self, ie_result, download, extra_info)
1905 self._sanitize_thumbnails(ie_result)
1906 try:
-> 1907 return self.__process_playlist(ie_result, download)
1908 finally:
1909 self._playlist_level -= 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:2054, in YoutubeDL.__process_playlist(self, ie_result, download)
2048 continue
2050 self.to_screen(
2051 f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
2052 f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
-> 2054 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2055 'playlist_index': playlist_index,
2056 'playlist_autonumber': i + 1,
2057 }, extra))
2058 if not entry_result:
2059 failures += 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1622, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
1620 while True:
1621 try:
-> 1622 return func(self, *args, **kwargs)
1623 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1624 raise
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:2086, in YoutubeDL.__process_iterable_entry(self, entry, download, extra_info)
2084 @_handle_extraction_exceptions
2085 def __process_iterable_entry(self, entry, download, extra_info):
-> 2086 return self.process_ie_result(
2087 entry, download=download, extra_info=extra_info)
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1907, in YoutubeDL.process_ie_result(self, ie_result, download, extra_info)
1905 self._sanitize_thumbnails(ie_result)
1906 try:
-> 1907 return self.__process_playlist(ie_result, download)
1908 finally:
1909 self._playlist_level -= 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:2054, in YoutubeDL.__process_playlist(self, ie_result, download)
2048 continue
2050 self.to_screen(
2051 f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
2052 f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
-> 2054 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2055 'playlist_index': playlist_index,
2056 'playlist_autonumber': i + 1,
2057 }, extra))
2058 if not entry_result:
2059 failures += 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1622, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
1620 while True:
1621 try:
-> 1622 return func(self, *args, **kwargs)
1623 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1624 raise
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:2086, in YoutubeDL.__process_iterable_entry(self, entry, download, extra_info)
2084 @_handle_extraction_exceptions
2085 def __process_iterable_entry(self, entry, download, extra_info):
-> 2086 return self.process_ie_result(
2087 entry, download=download, extra_info=extra_info)
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1857, in YoutubeDL.process_ie_result(self, ie_result, download, extra_info)
1853 return ie_result
1854 elif result_type == 'url':
1855 # We have to add extra_info to the results because it may be
1856 # contained in a playlist
-> 1857 return self.extract_info(
1858 ie_result['url'], download,
1859 ie_key=ie_result.get('ie_key'),
1860 extra_info=extra_info)
1861 elif result_type == 'url_transparent':
1862 # Use the information from the embedding page
1863 info = self.extract_info(
1864 ie_result['url'], ie_key=ie_result.get('ie_key'),
1865 extra_info=extra_info, download=False, process=False)
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1611, in YoutubeDL.extract_info(self, url, download, ie_key, extra_info, process, force_generic_extractor)
1609 raise ExistingVideoReached
1610 break
-> 1611 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1612 else:
1613 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1622, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
1620 while True:
1621 try:
-> 1622 return func(self, *args, **kwargs)
1623 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1624 raise
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1757, in YoutubeDL.__extract_info(self, url, ie, download, extra_info, process)
1754 self._apply_header_cookies(url)
1756 try:
-> 1757 ie_result = ie.extract(url)
1758 except UserNotLive as e:
1759 if process:
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/extractor/common.py:740, in InfoExtractor.extract(self, url)
737 self.initialize()
738 self.to_screen('Extracting URL: %s' % (
739 url if self.get_param('verbose') else truncate_string(url, 100, 20)))
--> 740 ie_result = self._real_extract(url)
741 if ie_result is None:
742 return None
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/extractor/youtube.py:4152, in YoutubeIE._real_extract(self, url)
4149 base_url = self.http_scheme() + '//www.youtube.com/'
4150 webpage_url = base_url + 'watch?v=' + video_id
-> 4152 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4154 playability_statuses = traverse_obj(
4155 player_responses, (..., 'playabilityStatus'), expected_type=dict)
4157 trailer_video_id = get_first(
4158 playability_statuses,
4159 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4160 expected_type=str)
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/extractor/youtube.py:4111, in YoutubeIE._download_player_responses(self, url, smuggled_data, video_id, webpage_url)
4109 if pp:
4110 query['pp'] = pp
-> 4111 webpage = self._download_webpage(
4112 webpage_url, video_id, fatal=False, query=query)
4114 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
4116 player_responses, player_url = self._extract_player_responses(
4117 self._get_requested_clients(url, smuggled_data),
4118 video_id, webpage, master_ytcfg, smuggled_data)
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/extractor/common.py:1188, in InfoExtractor._download_webpage(self, url_or_request, video_id, note, errnote, fatal, tries, timeout, *args, **kwargs)
1186 while True:
1187 try:
-> 1188 return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
1189 except IncompleteRead as e:
1190 try_count += 1
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/extractor/common.py:1139, in InfoExtractor.__create_download_methods.<locals>.download_content(self, url_or_request, video_id, note, errnote, transform_source, fatal, encoding, data, headers, query, expected_status, impersonate, require_impersonation)
1137 kwargs.pop('transform_source')
1138 # The method is fetched by name so subclasses can override _download_..._handle
-> 1139 res = getattr(self, download_handle.__name__)(url_or_request, video_id, **kwargs)
1140 return res if res is False else res[0]
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/extractor/common.py:966, in InfoExtractor._download_webpage_handle(self, url_or_request, video_id, note, errnote, fatal, encoding, data, headers, query, expected_status, impersonate, require_impersonation)
964 assert not fatal
965 return False
--> 966 content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
967 encoding=encoding, data=data)
968 return (content, urlh)
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/extractor/common.py:1042, in InfoExtractor._webpage_read_content(self, urlh, url_or_request, video_id, note, errnote, fatal, prefix, encoding, data)
1040 def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
1041 prefix=None, encoding=None, data=None):
-> 1042 webpage_bytes = urlh.read()
1043 if prefix is not None:
1044 webpage_bytes = prefix + webpage_bytes
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/yt_dlp/networking/_requests.py:143, in RequestsResponseAdapter.read(self, amt)
140 def read(self, amt: int | None = None):
141 try:
142 # Interact with urllib3 response directly.
--> 143 return self.fp.read(amt, decode_content=True)
145 # See urllib3.response.HTTPResponse.read() for exceptions raised on read
146 except urllib3.exceptions.SSLError as e:
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/urllib3/response.py:949, in HTTPResponse.read(self, amt, decode_content, cache_content)
946 if len(self._decoded_buffer) >= amt:
947 return self._decoded_buffer.get(amt)
--> 949 data = self._raw_read(amt)
951 flush_decoder = amt is None or (amt != 0 and not data)
953 if not data and len(self._decoded_buffer) == 0:
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/urllib3/response.py:873, in HTTPResponse._raw_read(self, amt, read1)
870 fp_closed = getattr(self._fp, "closed", False)
872 with self._error_catcher():
--> 873 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
874 if amt is not None and amt != 0 and not data:
875 # Platform-specific: Buggy versions of Python.
876 # Close the connection when no data is returned
(...)
881 # not properly close the connection in all cases. There is
882 # no harm in redundantly calling close.
883 self._fp.close()
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/site-packages/urllib3/response.py:856, in HTTPResponse._fp_read(self, amt, read1)
853 return self._fp.read1(amt) if amt is not None else self._fp.read1()
854 else:
855 # StringIO doesn't like amt=None
--> 856 return self._fp.read(amt) if amt is not None else self._fp.read()
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/http/client.py:473, in HTTPResponse.read(self, amt)
470 return b""
472 if self.chunked:
--> 473 return self._read_chunked(amt)
475 if amt is not None:
476 if self.length is not None and amt > self.length:
477 # clip the read to the "end of response"
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/http/client.py:595, in HTTPResponse._read_chunked(self, amt)
593 value = []
594 try:
--> 595 while (chunk_left := self._get_chunk_left()) is not None:
596 if amt is not None and amt <= chunk_left:
597 value.append(self._safe_read(amt))
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/http/client.py:579, in HTTPResponse._get_chunk_left(self)
577 self._safe_read(2) # toss the CRLF at the end of the chunk
578 try:
--> 579 chunk_left = self._read_next_chunk_size()
580 except ValueError:
581 raise IncompleteRead(b'')
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/http/client.py:539, in HTTPResponse._read_next_chunk_size(self)
537 def _read_next_chunk_size(self):
538 # Read the next chunk size from the file
--> 539 line = self.fp.readline(_MAXLINE + 1)
540 if len(line) > _MAXLINE:
541 raise LineTooLong("chunk size")
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/socket.py:708, in SocketIO.readinto(self, b)
706 while True:
707 try:
--> 708 return self._sock.recv_into(b)
709 except timeout:
710 self._timeout_occurred = True
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/ssl.py:1252, in SSLSocket.recv_into(self, buffer, nbytes, flags)
1248 if flags != 0:
1249 raise ValueError(
1250 "non-zero flags not allowed in calls to recv_into() on %s" %
1251 self.__class__)
-> 1252 return self.read(nbytes, buffer)
1253 else:
1254 return super().recv_into(buffer, nbytes, flags)
File /usr/share/miniconda/envs/ols-stats/lib/python3.12/ssl.py:1104, in SSLSocket.read(self, len, buffer)
1102 try:
1103 if buffer is not None:
-> 1104 return self._sslobj.read(len, buffer)
1105 else:
1106 return self._sslobj.read(len)
KeyboardInterrupt:
Number of videos on the YouTube channel
len(yt_stat_df)
158
Number of videos of the Open Seeds calls
openseeds_yt_df = yt_stat_df.query("Title.str.contains('OLS-')",engine="python")
len(openseeds_yt_df)
74
Duration#
Number of hours of Open Seeds videos on the YouTube channel
sum(openseeds_yt_df.Duration)/60
78.0469444444444
Mean duration (minutes) of Open Seeds videos
openseeds_yt_df.Duration.mean()
63.28130630630629
Longest and shortest Open Seeds videos
openseeds_yt_df.sort_values("Duration", ascending=False)
Title | View_count | Duration | |
---|---|---|---|
65 | OLS-4 Week 06 - Open Science I: Project Develo... | 67 | 105.883333 |
125 | OLS-3 graduation session 3 | 81 | 103.933333 |
4 | OLS-8: Open Leadership in Practice | 45 | 93.233333 |
2 | OLS-8 - Week 10: Open Science Garden II | 42 | 92.083333 |
52 | OLS-5 Week 06:Open science I: Project Developm... | 92 | 86.066667 |
... | ... | ... | ... |
31 | OLS-6: Week 13 - Personal Ecology & Social hour | 33 | 24.183333 |
60 | OLS-4 Week 13 - Self Care and Personal Ecology | 38 | 14.866667 |
47 | OLS-5 Week 13 - Self-care & Social call [Skill... | 42 | 12.816667 |
69 | Creating an issue on OLS-4 repository | 79 | 2.766667 |
34 | Call opens for application to the OLS-7 traini... | 30 | 0.450000 |
74 rows × 3 columns
Views#
Total number of view of the Open Seeds videos on the YouTube channel
sum(openseeds_yt_df.View_count)
6684
Mean number of views per Open Seeds videos
openseeds_yt_df.View_count.mean()
90.32432432432432
Videos most and least viewed Open Seeds videos
openseeds_yt_df.sort_values("View_count", ascending=False)
Title | View_count | Duration | |
---|---|---|---|
136 | OLS-3 Application Launch Webinar and Q&A - J... | 834 | 38.100000 |
32 | OLS-7 cohort launch application webinar | 382 | 34.650000 |
135 | OLS-3 Week 2 Cohort call 1: Welcome to Open Li... | 229 | 63.850000 |
140 | [OLS-2] Cohort call 6 - week 10 - Open Science... | 186 | 69.916667 |
154 | OLS-1 - Week 2 - Cohort Call 1 - Welcome to Op... | 171 | 56.616667 |
... | ... | ... | ... |
34 | Call opens for application to the OLS-7 traini... | 30 | 0.450000 |
64 | OLS-4 Week 08 - Community design for inclusivity | 28 | 68.900000 |
48 | OLS-5 Week 12 - Designing & Empowering for inc... | 24 | 59.416667 |
61 | OLS-4 Week 12 - Diversity and Inclusion & Ally... | 22 | 54.933333 |
10 | Open Seeds OLS-7 Graduation - Group 1 (Multist... | 18 | 84.916667 |
74 rows × 3 columns