I fetch data in my Next JS app from Sanity to create dynamic routes.
Like this:
export const getStaticPaths = async () => {
const res = await client.fetch(`*[_type in ["work"] ]`);
const data = await res;
const paths = data.map((e) => {
return {
params: { slug: e.slug.current },
};
});
return {
paths,
fallback: false,
};
};
export const getStaticProps = async (context) => {
const slug = context.params.slug;
const res = await client.fetch(`*[_type == "work" && slug.current == "${slug}"]
`);
const data = await res;
const resAll = await client.fetch(`*[_type == "work"] | order(order asc)`);
const dataAll = await resAll;
return {
props: {
post: data[0],
dataAll,
},
revalidate: 1, // 10 seconds
};
};
on localhost everything works fine and quick, on Netlify i am getting a 404 error for every newly generated route.
only after a redeploy the page shows up.
My directory looks like this:
-works
----[slug].jsx
----index.jsx
Why doesnt netlify recognize the new path right away?
Also, every change to existing content on the site via sanity takes rather long to show up on netlify.
I tried creating a build hook in netlify and listen to the changes in content, to trigger a build on the server every time new content is added.
This seems like a hacky workaround, though. There must be a simpler way to get this to work.
Possible Solution:
I change fallback from false to true, and it works on Netlify without getting a 404.
It breaks on localhost though, but works on the live server.
Related
I'm trying to crawl several web pages to check broken links and writing the results of the links to a json files, however, after the first file is completed the app crashes with no error popping up...
I'm using Puppeteer to crawl, Bluebird to run each link concurrently and fs to write the files.
WHAT IVE TRIED:
switching file type to '.txt' or '.php', this works but I need to create another loop outside the current workflow to convert the files from '.txt' to '.json'. Renaming the file right after writing to it also causes the app to crash.
using try catch statements for fs.writeFile but it never throws an error
the entire app outside of express, this worked at some point but i trying to use it within the framework
const express = require('express');
const router = express.Router();
const puppeteer = require('puppeteer');
const bluebird = require("bluebird");
const fs = require('fs');
router.get('/', function(req, res, next) {
(async () => {
// Our (multiple) URLs.
const urls = ['https://www.testing.com/allergy-test/', 'https://www.testing.com/genetic-testing/'];
const withBrowser = async (fn) => {
const browser = await puppeteer.launch();
try {
return await fn(browser);
} finally {
await browser.close();
}
}
const withPage = (browser) => async (fn) => {
const page = await browser.newPage();
// Turns request interceptor on.
await page.setRequestInterception(true);
// Ignore all the asset requests, just get the document.
page.on('request', request => {
if (request.resourceType() === 'document' ) {
request.continue();
} else {
request.abort();
}
});
try {
return await fn(page);
} finally {
await page.close();
}
}
const results = await withBrowser(async (browser) => {
return bluebird.map(urls, async (url) => {
return withPage(browser)(async (page) => {
await page.goto(url, {
waitUntil: 'domcontentloaded',
timeout: 0 // Removes timeout.
});
// Search for urls we want to "crawl".
const hrefs = await page.$$eval('a[href^="https://www.testing.com/"]', as => as.map(a => a.href));
// Predefine our arrays.
let links = [];
let redirect = [];
// Loops through each /goto/ url on page
for (const href of Object.entries(hrefs)) {
response = await page.goto(href[1], {
waitUntil: 'domcontentloaded',
timeout: 0 // Remove timeout.
});
const chain = response.request().redirectChain();
const link = {
'source_url': href[1],
'status': response.status(),
'final_url': response.url(),
'redirect_count': chain.length,
};
// Loops through the redirect chain for each href.
for ( const ch of chain) {
redirect = {
status: ch.response().status(),
url: ch.url(),
};
}
// Push all info of target link into links
links.push(link);
}
// JSONify the data.
const linksJson = JSON.stringify(links);
fileName = url.replace('https://www.testing.com/', '');
fileName = fileName.replace(/[^a-zA-Z0-9\-]/g, '');
// Write data to file in /tmp directory.
fs.writeFile(`./tmp/${fileName}.json`, linksJson, (err) => {
if (err) {
return console.log(err);
}
});
});
}, {concurrency: 4}); // How many pages to run at a time.
});
})();
});
module.exports = router;
UPDATE:
So there is nothing wrong with my code... I realized nodemon was stopping the process after each file was saved. Since nodemon would detect a "file change" it kept restarting my server after the first item
I'm using the NodeJS OctoKit API and for our business analytics I'm trying to gather a list of all commits across all repositories. So I can make a little "ranking".
const owner = req.params.owner;
const { data } = await octokit.request('GET /user/repos', { type: 'private' })
let total = 0;
data.forEach(element => {
const name = element.name;
octokit.repos.listCommits({
owner,
name,
}).then(r => {
total += r.data.length;
}).catch(error => console.log(error));
})
console.log(total);
I tried something like this but it catches load of 404 errors because I think the repos are private
I guess you solved it by now. How did you initialized octokit?
did you put the correct token ?
const octokit = new Octokit({ auth: <githubToken> });
I have a client that im working with who needs his pdfs to be readable in browser and the user doesn't need to download them first and it turned out to not be an option to do it through Wordpress so I thought I can download them in gatsby before build everytime if they don't already exist and I was wondering if this is possible.
I found this repo: https://github.com/jamstack-cms/jamstack-ecommerce
that shows a way to do it with this code:
function getImageKey(url) {
const split = url.split('/')
const key = split[split.length - 1]
const keyItems = key.split('?')
const imageKey = keyItems[0]
return imageKey
}
function getPathName(url, pathName = 'downloads') {
let reqPath = path.join(__dirname, '..')
let key = getImageKey(url)
key = key.replace(/%/g, "")
const rawPath = `${reqPath}/public/${pathName}/${key}`
return rawPath
}
async function downloadImage (url) {
return new Promise(async (resolve, reject) => {
const path = getPathName(url)
const writer = fs.createWriteStream(path)
const response = await axios({
url,
method: 'GET',
responseType: 'stream'
})
response.data.pipe(writer)
writer.on('finish', resolve)
writer.on('error', reject)
})
}
but It doesn't seem to work if i put it in my createPages and i cant use it outside it either because i don't have access to graphql to query the data first.
any idea how to do this?
WordPress source example is defined as async:
exports.createPages = async ({ graphql, actions }) => {
... so you can already use await to download your file(-s) just after querying data (and before createQuery() call). It should (NOT TESTED) be as easy as:
// Check for any errors
if (result.errors) {
console.error(result.errors)
}
// Access query results via object destructuring
const { allWordpressPage, allWordpressPost } = result.data
const pageTemplate = path.resolve(`./src/templates/page.js`)
allWordpressPage.edges.forEach(edge => {
// for one file per edge
// url taken/constructed from some edge property
await downloadImage (url);
createPage({
Of course for multiple files you should use Promise.all to wait for [resolving] all [returned promise] downloads before creating page:
allWordpressPage.edges.forEach(edge => {
// for multiple files per edge(page)
// url taken/constructed from some edge properties in a loop
// adapth 'paths' of iterable (edge.xxx.yyy...)
// and/or downloadImage(image) argument, f.e. 'image.someUrl'
await Promise.all(
edge.node.someImageArrayNode.map( image => { return downloadImage(image); }
);
createPage({
If you need to pass/update image nodes (for components usage) you should be able to mutate nodes, f.e.:
await Promise.all(
edge.node.someImageArrayNode.map( image => {
image["fullUrl"] = `/publicPath/${image.url}`;
return downloadImage(image.url); // return Promise at the end
}
);
createPage({
path: slugify(item.name),
component: ItemView,
context: {
content: item,
title: item.name,
firstImageUrl: edge.node.someImageArrayNode[0].fullUrl,
images: edge.node.someImageArrayNode
I try to create some API to external adobe stock.
Like in the title, first time i get query from Link router of undefined, but after reload page it work correctly. My
main page
<Link
href={{
pathname: "/kategoria-zdjec",
query: images.zdjecia_kategoria
}}
as={`/kategoria-zdjec?temat=${images.zdjecia_kategoria}`}
className={classes.button}>
</Link>
and my server
app
.prepare()
.then(() => {
server.get("/kategoria-zdjec", async (req, res) => {
const temat = await req.query.temat;
console.log(temat)
const url = `https://stock.adobe.io/Rest/Media/1/Search/Files?locale=pl_PL&search_parameters[words]=${temat}&search_parameters[limit]=24&search_parameters[offset]=1`;
try {
const fetchData = await fetch(url, {
headers: { ... }
});
const objectAdobeStock = await fetchData.json();
res.json(objectAdobeStock);
const totalObj = await objectAdobeStock.nb_results;
const adobeImages = await objectAdobeStock.files;
} catch (error) {
console.log(error);
}
});
and that looks like getInitialProps on page next page
Zdjecia.getInitialProps = async ({req}) => {
const res = await fetch("/kategoria-zdjec");
const json = await res.json();
return { total: json.nb_results, images: json.files };
}
I think it is problem due asynchronous.
I think this might be due to the fact that you are using fetch which is actually part of the Web API and this action fails when executed on server.
You could either use isomorphic-fetch which keeps fetch API consistent between client and server, or use node-fetch when fetch is called on the server:
Zdjecia.getInitialProps = async ({ req, isServer }) => {
const fetch = isServer ? require('node-fetch') : window.fetch;
const res = await fetch("/kategoria-zdjec");
const json = await res.json();
return { total: json.nb_results, images: json.files };
}
This problem is solved, the issue was in another part of my app, directly in state management, just created new variables, and pass to link state value.
I am using p-queue with Puppeteer. The goal is to run an X amount of Chrome instances where p-queue limits the amount of concurrency. When an exception occurs within a task in queue, I would like to requeue it. But when I do that the queue stops.
I have the following:
getAccounts it simply a helper method to parse a JSON file. And for every entry, I create it a task and submit it to the queue.
async init() {
let accounts = await this.getAccounts();
accounts.map(async () => {
await queue.add(() => this.test());
});
await queue.onIdle();
console.log("ended, with count: " + this._count)
}
The test method:
async test() {
this._count++;
const browser = await puppeteer.launch({headless: false});
try {
const page = await browser.newPage();
await page.goto(this._url);
if (Math.floor(Math.random() * 10) > 4) {
throw new Error("Simulate error");
}
await browser.close();
} catch (error) {
await browser.close();
await queue.add(() => this.test());
console.log(error);
}
}
If I run this without await queue.add(() => this.test());, it runs fine and limits the concurrency to 3. But with it, whenever it goes in the catch, the current Chrome instance stops.
It also does not log the error, and neither this console.log("ended, with count: " + this._count).
Is this a bug with the node module, or am I doing something wrong?
I recommend checking Apify SDK package, where you can simply use one of helper class to manage puppeteer pages/browsers.
PuppeteerPool:
It manages browser instances for you. If you set one-page per browser. Each new page will create a new browser instance.
const puppeteerPool = new PuppeteerPool({
maxOpenPagesPerInstance: 1,
});
const page1 = await puppeteerPool.newPage();
const page2 = await puppeteerPool.newPage();
const page3 = await puppeteerPool.newPage();
// ... do something with the pages ...
// Close all browsers.
await puppeteerPool.destroy();
Or the PuppeteerCrawler is more powerfull with several options and helpers. You can manage the whole crawler in puppeteer there. You can check the PuppeteerCrawler example.
edit:
Example of using PuppeteerCrawler 10 concurency
const Apify = require('apify');
Apify.main(async () => {
// Apify.openRequestQueue() is a factory to get a preconfigured RequestQueue instance.
// We add our first request to it - the initial page the crawler will visit.
const requestQueue = await Apify.openRequestQueue();
await requestQueue.addRequest({ url: 'https://news.ycombinator.com/' }); // Adds URLs you want to process
// Create an instance of the PuppeteerCrawler class - a crawler
// that automatically loads the URLs in headless Chrome / Puppeteer.
const crawler = new Apify.PuppeteerCrawler({
requestQueue,
maxConcurrency: 10, // Set max concurrency
puppeteerPoolOptions: {
maxOpenPagesPerInstance: 1, // Set up just one page for one browser instance
},
// The function accepts a single parameter, which is an object with the following fields:
// - request: an instance of the Request class with information such as URL and HTTP method
// - page: Puppeteer's Page object (see https://pptr.dev/#show=api-class-page)
handlePageFunction: async ({ request, page }) => {
// Code you want to process on each page
},
// This function is called if the page processing failed more than maxRequestRetries+1 times.
handleFailedRequestFunction: async ({ request }) => {
// Code you want to process when handlePageFunction failed
},
});
// Run the crawler and wait for it to finish.
await crawler.run();
console.log('Crawler finished.');
});
Example of using RequestList:
const Apify = require('apify');
Apify.main(async () => {
const requestList = new Apify.RequestList({
sources: [
// Separate requests
{ url: 'http://www.example.com/page-1' },
{ url: 'http://www.example.com/page-2' },
// Bulk load of URLs from file `http://www.example.com/my-url-list.txt`
{ requestsFromUrl: 'http://www.example.com/my-url-list.txt', userData: { isFromUrl: true } },
],
persistStateKey: 'my-state',
persistSourcesKey: 'my-sources',
});
// This call loads and parses the URLs from the remote file.
await requestList.initialize();
const crawler = new Apify.PuppeteerCrawler({
requestList,
maxConcurrency: 10, // Set max concurrency
puppeteerPoolOptions: {
maxOpenPagesPerInstance: 1, // Set up just one page for one browser instance
},
// The function accepts a single parameter, which is an object with the following fields:
// - request: an instance of the Request class with information such as URL and HTTP method
// - page: Puppeteer's Page object (see https://pptr.dev/#show=api-class-page)
handlePageFunction: async ({ request, page }) => {
// Code you want to process on each page
},
// This function is called if the page processing failed more than maxRequestRetries+1 times.
handleFailedRequestFunction: async ({ request }) => {
// Code you want to process when handlePageFunction failed
},
});
// Run the crawler and wait for it to finish.
await crawler.run();
console.log('Crawler finished.');
});