Skip to content

Commit

Permalink
send initialCanonicalUrl in array format to fix crawler issues (#69370)
Browse files Browse the repository at this point in the history
The initial RSC payload currently provides the `initialCanonicalUrl`
value to initialize the router state during the SSR render. However as
noted in #53274, it seems crawlers like Googlebot will crawl anything
URL-like that it finds in the document, regardless of whether or not
it's actually rendered as a link somewhere (which this isn't).

This splits the `initialCanonicalUrl` value into parts and then joins it
when it's ready to be consumed. That way when it's embedded in the
document, it's not in a path-like format.


Fixes #53274
Closes NDX-243
  • Loading branch information
ztanner committed Aug 27, 2024
1 parent bca90db commit 7f57d4b
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 11 deletions.
2 changes: 1 addition & 1 deletion packages/next/src/client/app-index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ const pendingActionQueue: Promise<AppRouterActionQueue> = new Promise(
createInitialRouterState({
buildId: initialRSCPayload.b,
initialFlightData: initialRSCPayload.f,
initialCanonicalUrl: initialRSCPayload.c,
initialCanonicalUrlParts: initialRSCPayload.c,
initialParallelRoutes: new Map(),
location: window.location,
couldBeIntercepted: initialRSCPayload.i,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ describe('createInitialRouterState', () => {
initialFlightData: [
[initialTree, ['', children, {}, null], <title>Test</title>],
],
initialCanonicalUrl,
initialCanonicalUrlParts: initialCanonicalUrl.split('/'),
initialParallelRoutes,
location: new URL('/linking', 'https://localhost') as any,
couldBeIntercepted: false,
Expand All @@ -50,7 +50,7 @@ describe('createInitialRouterState', () => {
initialFlightData: [
[initialTree, ['', children, {}, null], <title>Test</title>],
],
initialCanonicalUrl,
initialCanonicalUrlParts: initialCanonicalUrl.split('/'),
initialParallelRoutes,
location: new URL('/linking', 'https://localhost') as any,
couldBeIntercepted: false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import { addRefreshMarkerToActiveParallelSegments } from './refetch-inactive-par

export interface InitialRouterStateParameters {
buildId: string
initialCanonicalUrl: string
initialCanonicalUrlParts: string[]
initialParallelRoutes: CacheNode['parallelRoutes']
initialFlightData: FlightDataPath[]
location: Location | null
Expand All @@ -21,12 +21,16 @@ export interface InitialRouterStateParameters {
export function createInitialRouterState({
buildId,
initialFlightData,
initialCanonicalUrl,
initialCanonicalUrlParts,
initialParallelRoutes,
location,
couldBeIntercepted,
postponed,
}: InitialRouterStateParameters) {
// When initialized on the server, the canonical URL is provided as an array of parts.
// This is to ensure that when the RSC payload streamed to the client, crawlers don't interpret it
// as a URL that should be crawled.
const initialCanonicalUrl = initialCanonicalUrlParts.join('/')
// The initialFlightData is an array of FlightDataPath arrays.
// For the root render, there'll only be a top-level FlightDataPath array.
const [initialTree, initialSeedData, initialHead] = initialFlightData[0]
Expand Down
18 changes: 14 additions & 4 deletions packages/next/src/server/app-render/app-render.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,16 @@ async function generateDynamicFlightRenderResult(
})
}

/**
* Crawlers will inadvertently think the canonicalUrl in the RSC payload should be crawled
* when our intention is to just seed the router state with the current URL.
* This function splits up the pathname so that we can later join it on
* when we're ready to consume the path.
*/
function prepareInitialCanonicalUrl(url: RequestStore['url']) {
return (url.pathname + url.search).split('/')
}

// This is the data necessary to render <AppRouter /> when no SSR errors are encountered
async function getRSCPayload(
tree: LoaderTree,
Expand Down Expand Up @@ -558,7 +568,7 @@ async function getRSCPayload(
P: <Preloads preloadCallbacks={preloadCallbacks} />,
b: ctx.renderOpts.buildId,
p: ctx.assetPrefix,
c: url.pathname + url.search,
c: prepareInitialCanonicalUrl(url),
i: !!couldBeIntercepted,
f: [[initialTree, seedData, initialHead]],
m: missingSlots,
Expand Down Expand Up @@ -635,7 +645,7 @@ async function getErrorRSCPayload(
return {
b: ctx.renderOpts.buildId,
p: ctx.assetPrefix,
c: url.pathname + url.search,
c: prepareInitialCanonicalUrl(url),
m: undefined,
i: false,
f: [[initialTree, initialSeedData, initialHead]],
Expand Down Expand Up @@ -669,7 +679,7 @@ function App<T>({
const initialState = createInitialRouterState({
buildId: response.b,
initialFlightData: response.f,
initialCanonicalUrl: response.c,
initialCanonicalUrlParts: response.c,
// location and initialParallelRoutes are not initialized in the SSR render
// they are set to an empty map and window.location, respectively during hydration
initialParallelRoutes: null!,
Expand Down Expand Up @@ -727,7 +737,7 @@ function AppWithoutContext<T>({
const initialState = createInitialRouterState({
buildId: response.b,
initialFlightData: response.f,
initialCanonicalUrl: response.c,
initialCanonicalUrlParts: response.c,
// location and initialParallelRoutes are not initialized in the SSR render
// they are set to an empty map and window.location, respectively during hydration
initialParallelRoutes: null!,
Expand Down
4 changes: 2 additions & 2 deletions packages/next/src/server/app-render/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ export type InitialRSCPayload = {
b: string
/** assetPrefix */
p: string
/** initialCanonicalUrl */
c: string
/** initialCanonicalUrlParts */
c: string[]
/** couldBeIntercepted */
i: boolean
/** initialFlightData */
Expand Down

0 comments on commit 7f57d4b

Please sign in to comment.