Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c1cead0
Merge pull request #982 from ScrapeGraphAI/pre/beta
VinciGit00 Jun 6, 2025
3322f9d
Update README.md
VinciGit00 Jun 6, 2025
30e6b59
ci(release): 1.54.0 [skip ci]
semantic-release-bot Jun 6, 2025
e846a14
fix: bug on generate answer node
VinciGit00 Jun 6, 2025
38b3997
ci(release): 1.54.1 [skip ci]
semantic-release-bot Jun 6, 2025
cd29791
feat: add adv
VinciGit00 Jun 7, 2025
8c54162
feat: update logs
VinciGit00 Jun 7, 2025
27d5096
Merge pull request #983 from ScrapeGraphAI/add-adv
VinciGit00 Jun 7, 2025
17d9a72
ci(release): 1.55.0 [skip ci]
semantic-release-bot Jun 7, 2025
2a73821
Update README.md
VinciGit00 Jun 9, 2025
94e9ebd
feat: add scrapegraphai integration
VinciGit00 Jun 13, 2025
3f64f88
ci(release): 1.56.0 [skip ci]
semantic-release-bot Jun 13, 2025
7340375
feat: add markdownify endpoint
VinciGit00 Jun 13, 2025
e4ba4e2
Merge branch 'main' of https://github.com/ScrapeGraphAI/Scrapegraph-ai
VinciGit00 Jun 13, 2025
9a2c02d
ci(release): 1.57.0 [skip ci]
semantic-release-bot Jun 13, 2025
1d1e4db
Update README.md
VinciGit00 Jun 16, 2025
07dec35
docs: add links to other language versions of README
dowithless Jun 16, 2025
273c7d1
Merge pull request #987 from dowithless/patch-1
VinciGit00 Jun 16, 2025
0c2481f
feat: add new oss link
VinciGit00 Jun 21, 2025
aa72708
Merge branch 'main' of https://github.com/ScrapeGraphAI/Scrapegraph-ai
VinciGit00 Jun 21, 2025
45ad464
ci(release): 1.58.0 [skip ci]
semantic-release-bot Jun 21, 2025
288c69a
feat: removed sposnsors
VinciGit00 Jun 24, 2025
3f8bc88
Merge branch 'main' of https://github.com/ScrapeGraphAI/Scrapegraph-ai
VinciGit00 Jun 24, 2025
6989e1a
ci(release): 1.59.0 [skip ci]
semantic-release-bot Jun 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: add scrapegraphai integration
  • Loading branch information
VinciGit00 committed Jun 13, 2025
commit 94e9ebd28061f8313bb23074b4db3406cf4db0c9
1 change: 1 addition & 0 deletions examples/search_graph/scrapegraphai/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SCRAPEGRAPH_API_KEY=your SCRAPEGRAPH_API_KEY
Empty file.
83 changes: 83 additions & 0 deletions examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Example implementation of search-based scraping using Scrapegraph AI.
This example demonstrates how to use the searchscraper to extract information from the web.
"""

import os
from typing import Dict, Any
from dotenv import load_dotenv
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger

def format_response(response: Dict[str, Any]) -> None:
"""
Format and print the search response in a readable way.

Args:
response (Dict[str, Any]): The response from the search API
"""
print("\n" + "="*50)
print("SEARCH RESULTS")
print("="*50)

# Print request ID
print(f"\nRequest ID: {response['request_id']}")

# Print number of sources
urls = response.get('reference_urls', [])
print(f"\nSources Processed: {len(urls)}")

# Print the extracted information
print("\nExtracted Information:")
print("-"*30)
if isinstance(response['result'], dict):
for key, value in response['result'].items():
print(f"\n{key.upper()}:")
if isinstance(value, list):
for item in value:
print(f" • {item}")
else:
print(f" {value}")
else:
print(response['result'])

# Print source URLs
if urls:
print("\nSources:")
print("-"*30)
for i, url in enumerate(urls, 1):
print(f"{i}. {url}")
print("\n" + "="*50)

def main():
# Load environment variables
load_dotenv()

# Get API key
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
if not api_key:
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")

# Configure logging
sgai_logger.set_logging(level="INFO")

# Initialize client
sgai_client = Client(api_key=api_key)

try:
# Basic search scraper example
print("\nSearching for information...")

search_response = sgai_client.searchscraper(
user_prompt="Extract webpage information"
)
format_response(search_response)

except Exception as e:
print(f"\nError occurred: {str(e)}")
finally:
# Always close the client
sgai_client.close()

if __name__ == "__main__":
main()
30 changes: 0 additions & 30 deletions examples/smart_scraper_graph/README.md

This file was deleted.

1 change: 1 addition & 0 deletions examples/smart_scraper_graph/scrapegraphai/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SCRAPEGRAPH_API_KEY=your SCRAPEGRAPH_API_KEY
148 changes: 148 additions & 0 deletions examples/smart_scraper_graph/scrapegraphai/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Smart Scraper Examples with Scrapegraph AI

This repository contains examples demonstrating how to use Scrapegraph AI's powerful web scraping capabilities to transform websites into structured data using natural language prompts.

## About Scrapegraph AI

[Scrapegraph AI](https://scrapegraphai.com) is a powerful web scraping API that transforms any website into structured data for AI agents and analytics. It's built specifically for AI agents and LLMs, featuring natural language instructions and structured JSON output.

Key features:
- Universal data extraction from any website
- Intelligent processing with advanced AI
- Lightning-fast setup with official SDKs
- Enterprise-ready with automatic proxy rotation
- Seamless integration with RAG systems

## Examples Included

### 1. Smart Scraper
The `smartscraper_scrapegraphai.py` example demonstrates how to extract structured data from a single website using natural language prompts.

### 2. Search Scraper
The `searchscraper_scrapegraphai.py` example shows how to:
- Search the internet for relevant information
- Extract structured data from multiple sources
- Merge and analyze information from different websites
- Get comprehensive answers to complex queries

## Prerequisites

- Python 3.7+
- pip (Python package manager)

## Installation

1. Clone the repository:
```bash
git clone https://github.com/yourusername/Scrapegraph-ai.git
cd Scrapegraph-ai
```

2. Install required dependencies:
```bash
pip install -r requirements.txt
```

3. Create a `.env` file in the `examples/smart_scraper_graph` directory with:
```env
SCRAPEGRAPH_API_KEY=your-api-key-here
```

## Usage

### Smart Scraper Example
```bash
python smartscraper_scrapegraphai.py
```

### Search Scraper Example
```bash
python searchscraper_scrapegraphai.py
```

## Example Outputs

### Smart Scraper Output
```python
Request ID: abc123...
Result: {
"founders": [
{
"name": "Marco Vinciguerra",
"role": "Founder & Software Engineer",
"bio": "LinkedIn profile of Marco Vinciguerra"
},
{
"name": "Lorenzo Padoan",
"role": "Founder & CEO",
"bio": "LinkedIn profile of Lorenzo Padoan"
}
]
}
Reference URLs: ["https://scrapegraphai.com/about"]
```

### Search Scraper Output
```python
Request ID: xyz789...
Number of sources processed: 3

Extracted Information:
{
"features": [
"Universal data extraction",
"Intelligent processing with AI",
"Lightning-fast setup",
"Enterprise-ready with proxy rotation"
],
"benefits": [
"Perfect for AI agents and LLMs",
"Natural language instructions",
"Structured JSON output",
"Seamless RAG integration"
]
}

Sources:
1. https://scrapegraphai.com
2. https://scrapegraphai.com/features
3. https://scrapegraphai.com/docs
```

## Features Demonstrated

- Environment variable configuration
- API client initialization
- Smart scraping with natural language prompts
- Search-based scraping across multiple sources
- Error handling and response processing
- Secure credential management

## Pricing and Credits

Scrapegraph AI offers various pricing tiers:
- Free: 50 credits included
- Starter: $20/month, 5,000 credits
- Growth: $100/month, 40,000 credits
- Pro: $500/month, 250,000 credits
- Enterprise: Custom solutions

Service costs:
- Smart Scraper: 10 credits per webpage
- Search Scraper: 30 credits per query

## Support and Resources

- [Official Documentation](https://scrapegraphai.com/docs)
- [API Status](https://scrapegraphai.com/status)
- Contact: [email protected]

## Security Notes

- Never commit your `.env` file to version control
- Keep your API key secure
- Use environment variables for sensitive credentials

## License

This example is provided under the same license as Scrapegraph AI. See the [Terms of Service](https://scrapegraphai.com/terms) for more information.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Example implementation using scrapegraph-py client directly.
"""

import os
from dotenv import load_dotenv
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger

def main():
# Load environment variables from .env file
load_dotenv()

# Get API key from environment variables
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
if not api_key:
raise ValueError("SCRAPEGRAPH_API_KEY non trovato nelle variabili d'ambiente")

# Set up logging
sgai_logger.set_logging(level="INFO")

# Initialize the client with API key from environment
sgai_client = Client(api_key=api_key)

try:
# SmartScraper request
response = sgai_client.smartscraper(
website_url="https://scrapegraphai.com",
user_prompt="Extract the founders' informations"
)

# Print the response
print(f"Request ID: {response['request_id']}")
print(f"Result: {response['result']}")
if response.get('reference_urls'):
print(f"Reference URLs: {response['reference_urls']}")

except Exception as e:
print(f"Error occurred: {str(e)}")
finally:
# Always close the client
sgai_client.close()

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = [
"jsonschema>=4.23.0",
"duckduckgo-search>=7.2.1",
"pydantic>=2.10.2",
"scrapegraph-py>=0.1.0",
]

readme = "README.md"
Expand Down
18 changes: 18 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.