Skip to content
Open
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@
"legacy": false,
"lf_version": "1.4.3",
"metadata": {
"code_hash": "20398e0d18df",
"code_hash": "8c5296516f6c",
"dependencies": {
"dependencies": [
{
Expand Down Expand Up @@ -595,7 +595,7 @@
"show": true,
"title_case": false,
"type": "code",
"value": "from contextlib import contextmanager\n\nimport pandas as pd\nfrom googleapiclient.discovery import build\nfrom googleapiclient.errors import HttpError\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, IntInput, MessageTextInput, SecretStrInput\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.template.field.base import Output\n\n\nclass YouTubeCommentsComponent(Component):\n \"\"\"A component that retrieves comments from YouTube videos.\"\"\"\n\n display_name: str = \"YouTube Comments\"\n description: str = \"Retrieves and analyzes comments from YouTube videos.\"\n icon: str = \"YouTube\"\n\n # Constants\n COMMENTS_DISABLED_STATUS = 403\n NOT_FOUND_STATUS = 404\n API_MAX_RESULTS = 100\n\n inputs = [\n MessageTextInput(\n name=\"video_url\",\n display_name=\"Video URL\",\n info=\"The URL of the YouTube video to get comments from.\",\n tool_mode=True,\n required=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"YouTube API Key\",\n info=\"Your YouTube Data API key.\",\n required=True,\n ),\n IntInput(\n name=\"max_results\",\n display_name=\"Max Results\",\n value=20,\n info=\"The maximum number of comments to return.\",\n ),\n DropdownInput(\n name=\"sort_by\",\n display_name=\"Sort By\",\n options=[\"time\", \"relevance\"],\n value=\"relevance\",\n info=\"Sort comments by time or relevance.\",\n ),\n BoolInput(\n name=\"include_replies\",\n display_name=\"Include Replies\",\n value=False,\n info=\"Whether to include replies to comments.\",\n advanced=True,\n ),\n BoolInput(\n name=\"include_metrics\",\n display_name=\"Include Metrics\",\n value=True,\n info=\"Include metrics like like count and reply count.\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(name=\"comments\", display_name=\"Comments\", method=\"get_video_comments\"),\n ]\n\n def _extract_video_id(self, video_url: str) -> str:\n \"\"\"Extracts the video ID from a YouTube URL.\"\"\"\n import re\n\n patterns = [\n r\"(?:youtube\\.com\\/watch\\?v=|youtu.be\\/|youtube.com\\/embed\\/)([^&\\n?#]+)\",\n r\"youtube.com\\/shorts\\/([^&\\n?#]+)\",\n ]\n\n for pattern in patterns:\n match = re.search(pattern, video_url)\n if match:\n return match.group(1)\n\n return video_url.strip()\n\n def _process_reply(self, reply: dict, parent_id: str, *, include_metrics: bool = True) -> dict:\n \"\"\"Process a single reply comment.\"\"\"\n reply_snippet = reply[\"snippet\"]\n reply_data = {\n \"comment_id\": reply[\"id\"],\n \"parent_comment_id\": parent_id,\n \"author\": reply_snippet[\"authorDisplayName\"],\n \"text\": reply_snippet[\"textDisplay\"],\n \"published_at\": reply_snippet[\"publishedAt\"],\n \"is_reply\": True,\n }\n if include_metrics:\n reply_data[\"like_count\"] = reply_snippet[\"likeCount\"]\n reply_data[\"reply_count\"] = 0 # Replies can't have replies\n\n return reply_data\n\n def _process_comment(\n self, item: dict, *, include_metrics: bool = True, include_replies: bool = False\n ) -> list[dict]:\n \"\"\"Process a single comment thread.\"\"\"\n comment = item[\"snippet\"][\"topLevelComment\"][\"snippet\"]\n comment_id = item[\"snippet\"][\"topLevelComment\"][\"id\"]\n\n # Basic comment data\n processed_comments = [\n {\n \"comment_id\": comment_id,\n \"parent_comment_id\": \"\", # Empty for top-level comments\n \"author\": comment[\"authorDisplayName\"],\n \"author_channel_url\": comment.get(\"authorChannelUrl\", \"\"),\n \"text\": comment[\"textDisplay\"],\n \"published_at\": comment[\"publishedAt\"],\n \"updated_at\": comment[\"updatedAt\"],\n \"is_reply\": False,\n }\n ]\n\n # Add metrics if requested\n if include_metrics:\n processed_comments[0].update(\n {\n \"like_count\": comment[\"likeCount\"],\n \"reply_count\": item[\"snippet\"][\"totalReplyCount\"],\n }\n )\n\n # Add replies if requested\n if include_replies and item[\"snippet\"][\"totalReplyCount\"] > 0 and \"replies\" in item:\n for reply in item[\"replies\"][\"comments\"]:\n reply_data = self._process_reply(reply, parent_id=comment_id, include_metrics=include_metrics)\n processed_comments.append(reply_data)\n\n return processed_comments\n\n @contextmanager\n def youtube_client(self):\n \"\"\"Context manager for YouTube API client.\"\"\"\n client = build(\"youtube\", \"v3\", developerKey=self.api_key)\n try:\n yield client\n finally:\n client.close()\n\n def get_video_comments(self) -> DataFrame:\n \"\"\"Retrieves comments from a YouTube video and returns as DataFrame.\"\"\"\n try:\n # Extract video ID from URL\n video_id = self._extract_video_id(self.video_url)\n\n # Use context manager for YouTube API client\n with self.youtube_client() as youtube:\n comments_data = []\n results_count = 0\n request = youtube.commentThreads().list(\n part=\"snippet,replies\",\n videoId=video_id,\n maxResults=min(self.API_MAX_RESULTS, self.max_results),\n order=self.sort_by,\n textFormat=\"plainText\",\n )\n\n while request and results_count < self.max_results:\n response = request.execute()\n\n for item in response.get(\"items\", []):\n if results_count >= self.max_results:\n break\n\n comments = self._process_comment(\n item, include_metrics=self.include_metrics, include_replies=self.include_replies\n )\n comments_data.extend(comments)\n results_count += 1\n\n # Get the next page if available and needed\n if \"nextPageToken\" in response and results_count < self.max_results:\n request = youtube.commentThreads().list(\n part=\"snippet,replies\",\n videoId=video_id,\n maxResults=min(self.API_MAX_RESULTS, self.max_results - results_count),\n order=self.sort_by,\n textFormat=\"plainText\",\n pageToken=response[\"nextPageToken\"],\n )\n else:\n request = None\n\n # Convert to DataFrame\n comments_df = pd.DataFrame(comments_data)\n\n # Add video metadata\n comments_df[\"video_id\"] = video_id\n comments_df[\"video_url\"] = self.video_url\n\n # Sort columns for better organization\n column_order = [\n \"video_id\",\n \"video_url\",\n \"comment_id\",\n \"parent_comment_id\",\n \"is_reply\",\n \"author\",\n \"author_channel_url\",\n \"text\",\n \"published_at\",\n \"updated_at\",\n ]\n\n if self.include_metrics:\n column_order.extend([\"like_count\", \"reply_count\"])\n\n comments_df = comments_df[column_order]\n\n return DataFrame(comments_df)\n\n except HttpError as e:\n error_message = f\"YouTube API error: {e!s}\"\n if e.resp.status == self.COMMENTS_DISABLED_STATUS:\n error_message = \"Comments are disabled for this video or API quota exceeded.\"\n elif e.resp.status == self.NOT_FOUND_STATUS:\n error_message = \"Video not found.\"\n\n return DataFrame(pd.DataFrame({\"error\": [error_message]}))\n"
"value": "from contextlib import contextmanager\n\nimport pandas as pd\nfrom googleapiclient.discovery import build\nfrom googleapiclient.errors import HttpError\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, IntInput, MessageTextInput, SecretStrInput\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.template.field.base import Output\n\n\nclass YouTubeCommentsComponent(Component):\n \"\"\"A component that retrieves comments from YouTube videos.\"\"\"\n\n display_name: str = \"YouTube Comments\"\n description: str = \"Retrieves and analyzes comments from YouTube videos.\"\n icon: str = \"YouTube\"\n\n # Constants\n COMMENTS_DISABLED_STATUS = 403\n NOT_FOUND_STATUS = 404\n API_MAX_RESULTS = 100\n\n inputs = [\n MessageTextInput(\n name=\"video_url\",\n display_name=\"Video URL\",\n info=\"The URL of the YouTube video to get comments from.\",\n tool_mode=True,\n required=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"YouTube API Key\",\n info=\"Your YouTube Data API key.\",\n required=True,\n ),\n IntInput(\n name=\"max_results\",\n display_name=\"Max Results\",\n value=20,\n info=\"The maximum number of comments to return.\",\n ),\n DropdownInput(\n name=\"sort_by\",\n display_name=\"Sort By\",\n options=[\"time\", \"relevance\"],\n value=\"relevance\",\n info=\"Sort comments by time or relevance.\",\n ),\n BoolInput(\n name=\"include_replies\",\n display_name=\"Include Replies\",\n value=False,\n info=\"Whether to include replies to comments.\",\n advanced=True,\n ),\n BoolInput(\n name=\"include_metrics\",\n display_name=\"Include Metrics\",\n value=True,\n info=\"Include metrics like like count and reply count.\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(name=\"comments\", display_name=\"Comments\", method=\"get_video_comments\"),\n ]\n\n def _extract_video_id(self, video_url: str) -> str:\n \"\"\"Extracts the video ID from a YouTube URL.\"\"\"\n import re\n\n patterns = [\n r\"(?:youtube\\.com\\/watch\\?v=|youtu.be\\/|youtube.com\\/embed\\/)([^&\\n?#]+)\",\n r\"youtube.com\\/shorts\\/([^&\\n?#]+)\",\n ]\n\n for pattern in patterns:\n match = re.search(pattern, video_url)\n if match:\n return match.group(1)\n\n return video_url.strip()\n\n def _process_reply(self, reply: dict, parent_id: str, *, include_metrics: bool = True) -> dict:\n \"\"\"Process a single reply comment.\"\"\"\n reply_snippet = reply[\"snippet\"]\n reply_data = {\n \"comment_id\": reply[\"id\"],\n \"parent_comment_id\": parent_id,\n \"author\": reply_snippet[\"authorDisplayName\"],\n \"text\": reply_snippet[\"textDisplay\"],\n \"published_at\": reply_snippet[\"publishedAt\"],\n \"is_reply\": True,\n }\n if include_metrics:\n reply_data[\"like_count\"] = reply_snippet[\"likeCount\"]\n reply_data[\"reply_count\"] = 0 # Replies can't have replies\n\n return reply_data\n\n def _process_comment(\n self, item: dict, *, include_metrics: bool = True, include_replies: bool = False\n ) -> list[dict]:\n \"\"\"Process a single comment thread.\"\"\"\n comment = item[\"snippet\"][\"topLevelComment\"][\"snippet\"]\n comment_id = item[\"snippet\"][\"topLevelComment\"][\"id\"]\n\n # Basic comment data\n processed_comments = [\n {\n \"comment_id\": comment_id,\n \"parent_comment_id\": \"\", # Empty for top-level comments\n \"author\": comment[\"authorDisplayName\"],\n \"author_channel_url\": comment.get(\"authorChannelUrl\", \"\"),\n \"text\": comment[\"textDisplay\"],\n \"published_at\": comment[\"publishedAt\"],\n \"updated_at\": comment[\"updatedAt\"],\n \"is_reply\": False,\n }\n ]\n\n # Add metrics if requested\n if include_metrics:\n processed_comments[0].update(\n {\n \"like_count\": comment[\"likeCount\"],\n \"reply_count\": item[\"snippet\"][\"totalReplyCount\"],\n }\n )\n\n # Add replies if requested\n if include_replies and item[\"snippet\"][\"totalReplyCount\"] > 0 and \"replies\" in item:\n for reply in item[\"replies\"][\"comments\"]:\n reply_data = self._process_reply(reply, parent_id=comment_id, include_metrics=include_metrics)\n processed_comments.append(reply_data)\n\n return processed_comments\n\n @contextmanager\n def youtube_client(self):\n \"\"\"Context manager for YouTube API client.\"\"\"\n client = build(\"youtube\", \"v3\", developerKey=self.api_key)\n try:\n yield client\n finally:\n client.close()\n\n def get_video_comments(self) -> DataFrame:\n \"\"\"Retrieves comments from a YouTube video and returns as DataFrame.\"\"\"\n try:\n # Extract video ID from URL\n video_id = self._extract_video_id(self.video_url)\n\n # Use context manager for YouTube API client\n with self.youtube_client() as youtube:\n comments_data = []\n results_count = 0\n request = youtube.commentThreads().list(\n part=\"snippet,replies\",\n videoId=video_id,\n maxResults=min(self.API_MAX_RESULTS, self.max_results),\n order=self.sort_by,\n textFormat=\"plainText\",\n )\n\n while request and results_count < self.max_results:\n response = request.execute()\n\n for item in response.get(\"items\", []):\n if results_count >= self.max_results:\n break\n\n comments = self._process_comment(\n item, include_metrics=self.include_metrics, include_replies=self.include_replies\n )\n comments_data.extend(comments)\n results_count += 1\n\n # Get the next page if available and needed\n if \"nextPageToken\" in response and results_count < self.max_results:\n request = youtube.commentThreads().list(\n part=\"snippet,replies\",\n videoId=video_id,\n maxResults=min(self.API_MAX_RESULTS, self.max_results - results_count),\n order=self.sort_by,\n textFormat=\"plainText\",\n pageToken=response[\"nextPageToken\"],\n )\n else:\n request = None\n\n # Define column order\n column_order = [\n \"video_id\",\n \"video_url\",\n \"comment_id\",\n \"parent_comment_id\",\n \"is_reply\",\n \"author\",\n \"author_channel_url\",\n \"text\",\n \"published_at\",\n \"updated_at\",\n ]\n\n if self.include_metrics:\n column_order.extend([\"like_count\", \"reply_count\"])\n\n # Handle empty comments case\n if not comments_data:\n # Create empty DataFrame with proper columns\n comments_df = pd.DataFrame(columns=column_order)\n else:\n # Convert to DataFrame\n comments_df = pd.DataFrame(comments_data)\n\n # Add video metadata\n comments_df[\"video_id\"] = video_id\n comments_df[\"video_url\"] = self.video_url\n\n # Reorder columns\n comments_df = comments_df[column_order]\n\n return DataFrame(comments_df)\n\n except HttpError as e:\n error_message = f\"YouTube API error: {e!s}\"\n if e.resp.status == self.COMMENTS_DISABLED_STATUS:\n error_message = \"Comments are disabled for this video or API quota exceeded.\"\n elif e.resp.status == self.NOT_FOUND_STATUS:\n error_message = \"Video not found.\"\n\n return DataFrame(pd.DataFrame({\"error\": [error_message]}))\n"
},
"include_metrics": {
"_input_type": "BoolInput",
Expand Down
2 changes: 1 addition & 1 deletion src/lfx/src/lfx/_assets/component_index.json

Large diffs are not rendered by default.

24 changes: 15 additions & 9 deletions src/lfx/src/lfx/components/youtube/comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,14 +193,7 @@ def get_video_comments(self) -> DataFrame:
else:
request = None

# Convert to DataFrame
comments_df = pd.DataFrame(comments_data)

# Add video metadata
comments_df["video_id"] = video_id
comments_df["video_url"] = self.video_url

# Sort columns for better organization
# Define column order
column_order = [
"video_id",
"video_url",
Expand All @@ -217,7 +210,20 @@ def get_video_comments(self) -> DataFrame:
if self.include_metrics:
column_order.extend(["like_count", "reply_count"])

comments_df = comments_df[column_order]
# Handle empty comments case
if not comments_data:
# Create empty DataFrame with proper columns
comments_df = pd.DataFrame(columns=column_order)
else:
# Convert to DataFrame
comments_df = pd.DataFrame(comments_data)

# Add video metadata
comments_df["video_id"] = video_id
comments_df["video_url"] = self.video_url

# Reorder columns
comments_df = comments_df[column_order]

return DataFrame(comments_df)

Expand Down
Loading