Hi @qqilihq,
sometimes, when scraping websites, content is only loaded when scrolling down (lazy loading via JS or natively). I have created a JavaScript which emulates a rather human like scroll behavior via:
- Incremental scroll steps
-Jitter and Spread per step - Option to chose between absolute and relative scroll down
- timeout in case of a UI issue
- Return value with some details for debugging
Though, it is far from working perfectly as in some occasions strange website implementations of sticky menus (my assumption) trigger a few scroll iterations and then cause the script to run for eternity.
Therefore, I’d like to propose to create a scroll node with the aforementioned or some of it’s features. It might also become quite useful when combined with the find element node to scroll horizontally through a slideshow.
As follows my script. Maybe anyone else, since I am not a full time JS-Developer, has some ideas for improvement or spotting apparent mistakes I missed.
function scrollIncrementally(targetScroll, isPercent = false, iteration = 0) {
const jitterThreshold = 50; // Minimum jitter value
const spread = 50; // Range for additional jitter
const timeoutThreshold = 3000; // Adjust the timeout threshold (in milliseconds) as needed
let lastScrollTime = Date.now();
// Calculate the target scroll position
const windowHeight = window.innerHeight;
const maxScrollPosition = document.body.scrollHeight - windowHeight;
const targetPosition = isPercent
? maxScrollPosition - (maxScrollPosition * targetScroll) / 100
: maxScrollPosition - targetScroll;
// Check if we've reached or exceeded the target scroll position
if (window.scrollY <= targetPosition) {
const totalIterations = iteration - 1;
const currentScrollY = window.scrollY;
const result = `Finished Scrolling||Total Iterations: ${totalIterations}||Current Scroll Position: ${currentScrollY}||Target Scroll Position: ${targetPosition}`;
return Promise.resolve(result);
}
const previousScrollY = window.scrollY;
// Calculate the jittered scrollStep dynamically
const baseScrollStep = Math.abs(targetPosition - previousScrollY);
const jitteredScrollStep = baseScrollStep + jitterThreshold + Math.floor(Math.random() * (spread + 1));
// Ensure we don't overshoot the target
const scrollStep = jitteredScrollStep < baseScrollStep ? jitteredScrollStep : baseScrollStep;
// Scroll up by the calculated scrollStep
window.scrollTo({
top: previousScrollY - scrollStep,
behavior: 'smooth'
});
return new Promise((resolve, reject) => {
setTimeout(function () {
const currentScrollY = window.scrollY;
// Check if new content was loaded and scroll again
if (currentScrollY < previousScrollY) {
// New content loaded, update the last scroll time and continue scrolling
lastScrollTime = Date.now();
scrollIncrementally(targetScroll, isPercent, iteration + 1).then(resolve).catch(reject);
} else {
// No new content loaded, check the timeout threshold
const currentTime = Date.now();
const timeSinceLastScroll = currentTime - lastScrollTime;
if (timeSinceLastScroll >= timeoutThreshold) {
// Timeout threshold reached, finish scrolling
const totalIterations = iteration;
const result = `Finished Scrolling||Total Iterations: ${totalIterations}||Current Scroll Position: ${currentScrollY}||Target Scroll Position: ${targetPosition}`;
resolve(result);
} else {
// Wait for the next scroll event
scrollIncrementally(targetScroll, isPercent, iteration).then(resolve).catch(reject);
}
}
}, 500); // Adjust the delay time (in milliseconds) as needed for your page
});
}
// Use async/await to handle the promise
async function runScrolling() {
try {
// Example: Scroll up by 500 pixels from the bottom (absolute)
// To scroll by a percentage, use scrollIncrementally(20, true) for 20% from the bottom
const result = await scrollIncrementally(500, false);
console.log(result);
} catch (error) {
console.error(error);
}
}
// Start scrolling
runScrolling();
Best
Mike