# Standard libraries import asyncio import logging import json # Installed libraries from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver.chrome.options import Options as ChromeOptions from selenium.webdriver.common.proxy import ProxyType, Proxy from selenium.webdriver.common.by import By from selenium import webdriver from apify import Actor async def main() -> None: async with Actor: Actor.log.setLevel(logging.DEBUG) proxy_config = await Actor.create_proxy_configuration(groups=['RESIDENTIAL']) url = "https://api.ipify.org?format=json" for _ in range(10): proxy = await proxy_config.new_url() Actor.log.info(f'Using proxy: {proxy}') chrome_options = ChromeOptions() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.proxy = Proxy({'proxyType': ProxyType.MANUAL, 'httpProxy': proxy}) try: with webdriver.Chrome(options=chrome_options) as driver: driver.set_page_load_timeout(20) driver.get(url) content = driver.find_element(By.TAG_NAME, 'pre').text ip = json.loads(content).get("ip") Actor.log.info(f"IP = {ip}") except (TimeoutException, WebDriverException, json.JSONDecodeError): Actor.log.exception("An error occured")
Failed to compile
./node_modules/.pnpm/@crawlee+puppeteer@3.13.0_playwright@1.50.1/node_modules/@crawlee/puppeteer/internals/utils/puppeteer_utils.js:224:22
Module not found: Can't resolve 'puppeteer/package.json'
222 | return client.send(command, ...args);
223 | }
> 224 | const jsonPath = require.resolve('puppeteer/package.json');
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
225 | const parsed = JSON.parse(await (0, promises_1.readFile)(jsonPath, 'utf-8'));
226 | throw new Error(
Cannot detect CDP client for Puppeteer ${parsed.version}. You should report this to Crawlee, mentioning the puppeteer version you are using.);
227 | }
https://nextjs.org/docs/messages/module-not-found
import { PlaywrightCrawler, Dataset } from 'crawlee'; const crawler = new PlaywrightCrawler({ preNavigationHooks: [ async ({ page, request, log }) => { if (playwrightCookies.length > 0) { log.info(`Setting ${playwrightCookies.length} cookies for ${request.url}...`); await page.context().addCookies(playwrightCookies); } }, ], launchContext: { launchOptions: { headless: true, }, }, async requestHandler({ page, request, log }) { log.info(`Processing ${request.url}...`); const startTime = Date.now(); if (waitForSelector) { try { await page.waitForSelector(waitForSelector, { timeout: 60000 }); } catch (error) { log.info(`Selector "${waitForSelector}" not detected after 1 minute. Continuing...`); } }
There was an uncaught exception during the run of the Actor and it was not handled.
const crawler = new PuppeteerCrawler({ launchContext: { launchOptions: { headless: true, args: [ "--no-sandbox", // Mitigates the "sandboxed" process issue in Docker containers, "--ignore-certificate-errors", "--disable-dev-shm-usage", "--disable-infobars", "--disable-extensions", "--disable-setuid-sandbox", "--ignore-certificate-errors", "--disable-gpu", // Mitigates the "crashing GPU process" issue in Docker containers ], }, }, maxRequestRetries: 1, navigationTimeoutSecs: 60, autoscaledPoolOptions: { minConcurrency: 30 }, maxSessionRotations: 5, preNavigationHooks: [ async ({ blockRequests }, goToOptions) => { if (goToOptions) goToOptions.waitUntil = "domcontentloaded"; // Set waitUntil here await blockRequests({ urlPatterns: [ ... ], }); }, ], proxyConfiguration, requestHandler: router, }); await crawler.run(startUrls); await Actor.exit();
stringList
doesn't work on web console.{ "title" : "Test", "type" : "object", "schemaVersion" : 1, "properties" : { "search.location": {"title": "Locations #1", "type": "array", "description":"", "editor":"stringList", "prefill": ["Bandung"]}, ### <-- Problem "search_location": {"title": "Locations #2", "type": "array", "description":"", "editor":"stringList", "prefill": ["Bandung"]} } }
+Add
button. When edited using Bulk
button, the resulting Json
is weird. It automatically become Object Structure which is nice effect. not sure if this really a Bug, or new features ?