Skip to content

Commit c2dc695

Browse files
committed
modified scraper
1 parent 276da32 commit c2dc695

3 files changed

Lines changed: 11 additions & 2 deletions

File tree

dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ WORKDIR /app
2525
ENV PYTHONUNBUFFERED=1
2626
ENV PYTHONDONTWRITEBYTECODE=1
2727
ENV PORT=8080
28+
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
2829
#ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
2930
# ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
3031
# ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ langchain_core==0.3.66
1010
#langchain_google_vertexai==2.0.26
1111
langgraph==0.5.0
1212
Pillow==11.2.1
13-
playwright==1.50.0
13+
#playwright==1.50.0
1414
protobuf==3.20.3
1515
python-dotenv==1.1.1
1616
streamlit==1.46.0

scraper.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ async def scrape_content(self, state: WorkflowState) -> WorkflowState:
5252
browser = await p.chromium.launch(headless=True,
5353
args = [
5454
"--no-sandbox",
55+
"--disable-setuid-sandbox",
56+
"--no-zygote",
57+
"--single-process",
5558
"--disable-dev-shm-usage",
5659
"--disable-gpu"
5760
])
@@ -108,7 +111,12 @@ async def scrape_content(self, state: WorkflowState) -> WorkflowState:
108111

109112
except Exception as e:
110113
print(f"Error scraping content: {e}")
111-
return None
114+
#return None
115+
return {
116+
**state,
117+
"status": "scrape_failed",
118+
"error": str(e)
119+
}
112120

113121
finally:
114122
# close the browser

0 commit comments

Comments
 (0)