From a2b8a255121d5ee3c6dbc1d4b14c5b8c3091ec9e Mon Sep 17 00:00:00 2001 From: waleed Date: Sat, 11 Apr 2026 09:27:13 -0700 Subject: [PATCH 1/6] fix(trigger): auto-detect header row and rename lastKnownRowCount to lastIndexChecked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace hardcoded !1:1 header fetch with detectHeaderRow(), which scans the first 10 rows and returns the first non-empty row as headers. This fixes row: null / headers: [] when a sheet has blank rows or a title row above the actual column headers (e.g. headers in row 3). - Rename lastKnownRowCount → lastIndexChecked in GoogleSheetsWebhookConfig and all usage sites to clarify that the value is a row index pointer, not a total count. - Remove config parameter from processRows() since it was unused after the includeHeaders flag was removed. --- .../sim/lib/webhooks/polling/google-sheets.ts | 78 ++++++++++++------- 1 file changed, 52 insertions(+), 26 deletions(-) diff --git a/apps/sim/lib/webhooks/polling/google-sheets.ts b/apps/sim/lib/webhooks/polling/google-sheets.ts index 2b79b20697..cd48d81723 100644 --- a/apps/sim/lib/webhooks/polling/google-sheets.ts +++ b/apps/sim/lib/webhooks/polling/google-sheets.ts @@ -10,6 +10,9 @@ import { processPolledWebhookEvent } from '@/lib/webhooks/processor' const MAX_ROWS_PER_POLL = 100 +/** Maximum number of leading rows to scan when auto-detecting the header row. */ +const HEADER_SCAN_ROWS = 10 + type ValueRenderOption = 'FORMATTED_VALUE' | 'UNFORMATTED_VALUE' | 'FORMULA' type DateTimeRenderOption = 'SERIAL_NUMBER' | 'FORMATTED_STRING' @@ -20,7 +23,11 @@ interface GoogleSheetsWebhookConfig { manualSheetName?: string valueRenderOption?: ValueRenderOption dateTimeRenderOption?: DateTimeRenderOption - lastKnownRowCount?: number + /** + * The 1-indexed row number of the last row we have seeded or processed. + * New rows are emitted starting from lastIndexChecked + 1. + */ + lastIndexChecked?: number lastModifiedTime?: string lastCheckedTimestamp?: string maxRowsPerPoll?: number @@ -93,11 +100,11 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { ) // First poll: seed state, emit nothing - if (config.lastKnownRowCount === undefined) { + if (config.lastIndexChecked === undefined) { await updateWebhookProviderConfig( webhookId, { - lastKnownRowCount: currentRowCount, + lastIndexChecked: currentRowCount, lastModifiedTime: currentModifiedTime ?? config.lastModifiedTime, lastCheckedTimestamp: now.toISOString(), }, @@ -105,22 +112,23 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { ) await markWebhookSuccess(webhookId, logger) logger.info( - `[${requestId}] First poll for webhook ${webhookId}, seeded row count: ${currentRowCount}` + `[${requestId}] First poll for webhook ${webhookId}, seeded row index: ${currentRowCount}` ) return 'success' } - // Rows deleted or unchanged - if (currentRowCount <= config.lastKnownRowCount) { - if (currentRowCount < config.lastKnownRowCount) { + // Rows deleted or unchanged — update pointer to current position to avoid + // re-processing if rows are later re-added at a lower index + if (currentRowCount <= config.lastIndexChecked) { + if (currentRowCount < config.lastIndexChecked) { logger.warn( - `[${requestId}] Row count decreased from ${config.lastKnownRowCount} to ${currentRowCount} for webhook ${webhookId}` + `[${requestId}] Row count decreased from ${config.lastIndexChecked} to ${currentRowCount} for webhook ${webhookId}` ) } await updateWebhookProviderConfig( webhookId, { - lastKnownRowCount: currentRowCount, + lastIndexChecked: currentRowCount, lastModifiedTime: currentModifiedTime ?? config.lastModifiedTime, lastCheckedTimestamp: now.toISOString(), }, @@ -132,11 +140,11 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { } // New rows detected - const newRowCount = currentRowCount - config.lastKnownRowCount + const newRowCount = currentRowCount - config.lastIndexChecked const maxRows = config.maxRowsPerPoll || MAX_ROWS_PER_POLL const rowsToFetch = Math.min(newRowCount, maxRows) - const startRow = config.lastKnownRowCount + 1 - const endRow = config.lastKnownRowCount + rowsToFetch + const startRow = config.lastIndexChecked + 1 + const endRow = config.lastIndexChecked + rowsToFetch logger.info( `[${requestId}] Found ${newRowCount} new rows for webhook ${webhookId}, processing rows ${startRow}-${endRow}` @@ -146,7 +154,10 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { const valueRender = config.valueRenderOption || 'FORMATTED_VALUE' const dateTimeRender = config.dateTimeRenderOption || 'SERIAL_NUMBER' - const headers = await fetchHeaderRow( + // Auto-detect the header row by scanning the first HEADER_SCAN_ROWS rows for + // the first non-empty row. This handles sheets where headers are not in row 1 + // (e.g. when there are blank rows or a title row above the column headers). + const { headers } = await detectHeaderRow( accessToken, spreadsheetId, sheetName, @@ -156,8 +167,6 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { logger ) - // Fetch new rows — startRow/endRow are already 1-indexed sheet row numbers - // because lastKnownRowCount includes the header row const newRows = await fetchRowRange( accessToken, spreadsheetId, @@ -176,7 +185,6 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { startRow, spreadsheetId, sheetName, - config, webhookData, workflowData, requestId, @@ -184,12 +192,12 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { ) const rowsAdvanced = failedCount > 0 ? 0 : rowsToFetch - const newLastKnownRowCount = config.lastKnownRowCount + rowsAdvanced + const newLastIndexChecked = config.lastIndexChecked + rowsAdvanced const hasRemainingOrFailed = rowsAdvanced < newRowCount await updateWebhookProviderConfig( webhookId, { - lastKnownRowCount: newLastKnownRowCount, + lastIndexChecked: newLastIndexChecked, lastModifiedTime: hasRemainingOrFailed ? config.lastModifiedTime : (currentModifiedTime ?? config.lastModifiedTime), @@ -298,7 +306,14 @@ async function getDataRowCount( return rows?.length ?? 0 } -async function fetchHeaderRow( +/** + * Scans the first {@link HEADER_SCAN_ROWS} rows of the sheet and returns the + * first non-empty row as headers along with its 1-indexed row number. + * + * This avoids the hardcoded `!1:1` assumption and correctly handles sheets that + * have blank rows or a title row above the actual column headers. + */ +async function detectHeaderRow( accessToken: string, spreadsheetId: string, sheetName: string, @@ -306,14 +321,14 @@ async function fetchHeaderRow( dateTimeRenderOption: DateTimeRenderOption, requestId: string, logger: ReturnType -): Promise { +): Promise<{ headers: string[]; headerRowIndex: number }> { const encodedSheet = encodeURIComponent(sheetName) const params = new URLSearchParams({ fields: 'values', valueRenderOption, dateTimeRenderOption, }) - const url = `https://sheets.googleapis.com/v4/spreadsheets/${spreadsheetId}/values/${encodedSheet}!1:1?${params.toString()}` + const url = `https://sheets.googleapis.com/v4/spreadsheets/${spreadsheetId}/values/${encodedSheet}!1:${HEADER_SCAN_ROWS}?${params.toString()}` const response = await fetch(url, { headers: { Authorization: `Bearer ${accessToken}` }, @@ -324,15 +339,27 @@ async function fetchHeaderRow( if (status === 403 || status === 429) { const errorData = await response.json().catch(() => ({})) throw new Error( - `Sheets API rate limit (${status}) fetching header row — skipping to retry next poll cycle: ${JSON.stringify(errorData)}` + `Sheets API rate limit (${status}) fetching header rows — skipping to retry next poll cycle: ${JSON.stringify(errorData)}` ) } - logger.warn(`[${requestId}] Failed to fetch header row, proceeding without headers`) - return [] + logger.warn(`[${requestId}] Failed to fetch header rows, proceeding without headers`) + return { headers: [], headerRowIndex: 1 } } const data = await response.json() - return (data.values?.[0] as string[]) ?? [] + // The Sheets API includes empty leading rows as [] when a fixed range is requested, + // and omits only trailing empty rows. values[i] therefore corresponds to sheet row i+1. + const rows = (data.values as string[][] | undefined) ?? [] + + for (let i = 0; i < rows.length; i++) { + const row = rows[i] + if (row?.some((cell) => cell !== '')) { + return { headers: row, headerRowIndex: i + 1 } + } + } + + // No non-empty row found within the scan window — proceed without headers + return { headers: [], headerRowIndex: 1 } } async function fetchRowRange( @@ -383,7 +410,6 @@ async function processRows( startRowIndex: number, spreadsheetId: string, sheetName: string, - config: GoogleSheetsWebhookConfig, webhookData: PollWebhookContext['webhookData'], workflowData: PollWebhookContext['workflowData'], requestId: string, From fbf6a1741c6fb5eecc6fe689deb2dd68e27836fc Mon Sep 17 00:00:00 2001 From: waleed Date: Sat, 11 Apr 2026 10:25:43 -0700 Subject: [PATCH 2/6] fix(trigger): combine sheet state fetch, skip header/blank rows from data emission - Replace separate getDataRowCount() + detectHeaderRow() with a single fetchSheetState() call that returns rowCount, headers, and headerRowIndex from one A:Z fetch. Saves one Sheets API round-trip per poll cycle when new rows are detected. - Use headerRowIndex to compute adjustedStartRow, preventing the header row (and any blank rows above it) from being emitted as data events when lastIndexChecked was seeded from an empty sheet. - Handle the edge case where the entire batch falls within the header/blank window by advancing the pointer and returning early without fetching rows. - Skip empty rows (row.length === 0) in processRows rather than firing a workflow run with no meaningful data. --- .../sim/lib/webhooks/polling/google-sheets.ts | 169 +++++++++--------- 1 file changed, 89 insertions(+), 80 deletions(-) diff --git a/apps/sim/lib/webhooks/polling/google-sheets.ts b/apps/sim/lib/webhooks/polling/google-sheets.ts index cd48d81723..0bbf1a2576 100644 --- a/apps/sim/lib/webhooks/polling/google-sheets.ts +++ b/apps/sim/lib/webhooks/polling/google-sheets.ts @@ -90,11 +90,23 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { return 'success' } - // Fetch current row count via column A - const currentRowCount = await getDataRowCount( + // Resolve render options before the sheet fetch so they apply to both + // row counting and header detection in the same API call. + const valueRender = config.valueRenderOption || 'FORMATTED_VALUE' + const dateTimeRender = config.dateTimeRenderOption || 'SERIAL_NUMBER' + + // Single API call: get current row count AND auto-detect the header row. + // Combining these avoids a second round-trip when new rows are present. + const { + rowCount: currentRowCount, + headers, + headerRowIndex, + } = await fetchSheetState( accessToken, spreadsheetId, sheetName, + valueRender, + dateTimeRender, requestId, logger ) @@ -146,32 +158,41 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { const startRow = config.lastIndexChecked + 1 const endRow = config.lastIndexChecked + rowsToFetch + // If the header row (or blank rows above it) falls within the current + // fetch window, skip past them so the header is never emitted as a data + // event. This happens when lastIndexChecked was seeded from an empty sheet + // and the user subsequently added a header row + data rows together. + const adjustedStartRow = + headerRowIndex > 0 ? Math.max(startRow, headerRowIndex + 1) : startRow + logger.info( - `[${requestId}] Found ${newRowCount} new rows for webhook ${webhookId}, processing rows ${startRow}-${endRow}` + `[${requestId}] Found ${newRowCount} new rows for webhook ${webhookId}, processing rows ${adjustedStartRow}-${endRow}` ) - // Resolve render options - const valueRender = config.valueRenderOption || 'FORMATTED_VALUE' - const dateTimeRender = config.dateTimeRenderOption || 'SERIAL_NUMBER' - - // Auto-detect the header row by scanning the first HEADER_SCAN_ROWS rows for - // the first non-empty row. This handles sheets where headers are not in row 1 - // (e.g. when there are blank rows or a title row above the column headers). - const { headers } = await detectHeaderRow( - accessToken, - spreadsheetId, - sheetName, - valueRender, - dateTimeRender, - requestId, - logger - ) + // All rows in this batch are header or blank rows — advance the pointer + // and skip data fetching entirely. + if (adjustedStartRow > endRow) { + await updateWebhookProviderConfig( + webhookId, + { + lastIndexChecked: config.lastIndexChecked + rowsToFetch, + lastModifiedTime: currentModifiedTime ?? config.lastModifiedTime, + lastCheckedTimestamp: now.toISOString(), + }, + logger + ) + await markWebhookSuccess(webhookId, logger) + logger.info( + `[${requestId}] Batch ${startRow}-${endRow} contained only header/blank rows for webhook ${webhookId}, advancing pointer` + ) + return 'success' + } const newRows = await fetchRowRange( accessToken, spreadsheetId, sheetName, - startRow, + adjustedStartRow, endRow, valueRender, dateTimeRender, @@ -182,7 +203,7 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { const { processedCount, failedCount } = await processRows( newRows, headers, - startRow, + adjustedStartRow, spreadsheetId, sheetName, webhookData, @@ -264,56 +285,23 @@ async function getDriveFileModifiedTime( } } -async function getDataRowCount( - accessToken: string, - spreadsheetId: string, - sheetName: string, - requestId: string, - logger: ReturnType -): Promise { - const encodedSheet = encodeURIComponent(sheetName) - // Fetch all rows across columns A–Z with majorDimension=ROWS so the API - // returns one entry per row that has ANY non-empty cell. Rows where column A - // is empty but other columns have data are included, whereas the previous - // column-A-only approach silently missed them. The returned array length - // equals the 1-indexed row number of the last row with data. - const url = `https://sheets.googleapis.com/v4/spreadsheets/${spreadsheetId}/values/${encodedSheet}!A:Z?majorDimension=ROWS&fields=values` - - const response = await fetch(url, { - headers: { Authorization: `Bearer ${accessToken}` }, - }) - - if (!response.ok) { - const status = response.status - const errorData = await response.json().catch(() => ({})) - - if (status === 403 || status === 429) { - throw new Error( - `Sheets API rate limit (${status}) — skipping to retry next poll cycle: ${JSON.stringify(errorData)}` - ) - } - - throw new Error( - `Failed to fetch row count: ${status} ${response.statusText} - ${JSON.stringify(errorData)}` - ) - } - - const data = await response.json() - // values is [[row1col1, row1col2, ...], [row2col1, ...], ...] when majorDimension=ROWS. - // The Sheets API omits trailing empty rows, so the array length is the last - // non-empty row index (1-indexed), which is exactly what we need. - const rows = data.values as string[][] | undefined - return rows?.length ?? 0 -} - /** - * Scans the first {@link HEADER_SCAN_ROWS} rows of the sheet and returns the - * first non-empty row as headers along with its 1-indexed row number. + * Fetches the full sheet in a single API call and returns both the current row + * count and the auto-detected headers. + * + * Row count: the Sheets API omits trailing empty rows, so the length of the + * returned values array equals the 1-indexed number of the last row with data. * - * This avoids the hardcoded `!1:1` assumption and correctly handles sheets that - * have blank rows or a title row above the actual column headers. + * Header detection: scans the first {@link HEADER_SCAN_ROWS} rows and returns + * the first non-empty row as headers. This correctly handles sheets where + * headers are not in row 1 (e.g. blank rows or a title row above the column + * headers). `headerRowIndex` is the 1-indexed row number of that row, or 0 if + * no non-empty row was found within the scan window. + * + * Combining both into one call avoids the extra round-trip that a separate + * header-detection fetch would require on every cycle where new rows exist. */ -async function detectHeaderRow( +async function fetchSheetState( accessToken: string, spreadsheetId: string, sheetName: string, @@ -321,14 +309,21 @@ async function detectHeaderRow( dateTimeRenderOption: DateTimeRenderOption, requestId: string, logger: ReturnType -): Promise<{ headers: string[]; headerRowIndex: number }> { +): Promise<{ rowCount: number; headers: string[]; headerRowIndex: number }> { const encodedSheet = encodeURIComponent(sheetName) + // Fetch all rows across columns A–Z with majorDimension=ROWS so the API + // returns one entry per row that has ANY non-empty cell. Rows where column A + // is empty but other columns have data are included. The array length equals + // the 1-indexed row number of the last row with data (trailing empty rows are + // omitted by the Sheets API). Leading empty rows within the range are included + // as [] so values[i] reliably corresponds to sheet row i+1. const params = new URLSearchParams({ + majorDimension: 'ROWS', fields: 'values', valueRenderOption, dateTimeRenderOption, }) - const url = `https://sheets.googleapis.com/v4/spreadsheets/${spreadsheetId}/values/${encodedSheet}!1:${HEADER_SCAN_ROWS}?${params.toString()}` + const url = `https://sheets.googleapis.com/v4/spreadsheets/${spreadsheetId}/values/${encodedSheet}!A:Z?${params.toString()}` const response = await fetch(url, { headers: { Authorization: `Bearer ${accessToken}` }, @@ -336,30 +331,36 @@ async function detectHeaderRow( if (!response.ok) { const status = response.status + const errorData = await response.json().catch(() => ({})) + if (status === 403 || status === 429) { - const errorData = await response.json().catch(() => ({})) throw new Error( - `Sheets API rate limit (${status}) fetching header rows — skipping to retry next poll cycle: ${JSON.stringify(errorData)}` + `Sheets API rate limit (${status}) — skipping to retry next poll cycle: ${JSON.stringify(errorData)}` ) } - logger.warn(`[${requestId}] Failed to fetch header rows, proceeding without headers`) - return { headers: [], headerRowIndex: 1 } + + throw new Error( + `Failed to fetch sheet state: ${status} ${response.statusText} - ${JSON.stringify(errorData)}` + ) } const data = await response.json() - // The Sheets API includes empty leading rows as [] when a fixed range is requested, - // and omits only trailing empty rows. values[i] therefore corresponds to sheet row i+1. const rows = (data.values as string[][] | undefined) ?? [] + const rowCount = rows.length - for (let i = 0; i < rows.length; i++) { + // Find the first non-empty row within the header scan window + let headers: string[] = [] + let headerRowIndex = 0 + for (let i = 0; i < Math.min(rows.length, HEADER_SCAN_ROWS); i++) { const row = rows[i] if (row?.some((cell) => cell !== '')) { - return { headers: row, headerRowIndex: i + 1 } + headers = row + headerRowIndex = i + 1 + break } } - // No non-empty row found within the scan window — proceed without headers - return { headers: [], headerRowIndex: 1 } + return { rowCount, headers, headerRowIndex } } async function fetchRowRange( @@ -422,6 +423,14 @@ async function processRows( const row = rows[i] const rowNumber = startRowIndex + i // startRowIndex is already the 1-indexed sheet row + // The Sheets API returns [] for empty rows within a fixed range. Skip them + // rather than firing a workflow run with no meaningful data. + if (!row || row.length === 0) { + logger.info(`[${requestId}] Skipping empty row ${rowNumber} for webhook ${webhookData.id}`) + processedCount++ + continue + } + try { await pollingIdempotency.executeWithIdempotency( 'google-sheets', From 00fbe76b894af046a41da51145486323c2e4fcd9 Mon Sep 17 00:00:00 2001 From: waleed Date: Sat, 11 Apr 2026 10:28:18 -0700 Subject: [PATCH 3/6] fix(trigger): preserve lastModifiedTime when remaining rows exist after header skip When all rows in a batch fall within the header/blank window (adjustedStartRow > endRow), the early return was unconditionally updating lastModifiedTime to the current value. If there were additional rows beyond the batch cap, the next Drive pre-check would see an unchanged modifiedTime and skip polling entirely, leaving those rows unprocessed. Mirror the hasRemainingOrFailed pattern from the normal processing path. --- apps/sim/lib/webhooks/polling/google-sheets.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/sim/lib/webhooks/polling/google-sheets.ts b/apps/sim/lib/webhooks/polling/google-sheets.ts index 0bbf1a2576..d9975cd893 100644 --- a/apps/sim/lib/webhooks/polling/google-sheets.ts +++ b/apps/sim/lib/webhooks/polling/google-sheets.ts @@ -172,11 +172,16 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { // All rows in this batch are header or blank rows — advance the pointer // and skip data fetching entirely. if (adjustedStartRow > endRow) { + // Preserve the old modifiedTime if there are remaining rows beyond this + // batch so the Drive pre-check doesn't skip the next poll cycle. + const hasRemainingRows = rowsToFetch < newRowCount await updateWebhookProviderConfig( webhookId, { lastIndexChecked: config.lastIndexChecked + rowsToFetch, - lastModifiedTime: currentModifiedTime ?? config.lastModifiedTime, + lastModifiedTime: hasRemainingRows + ? config.lastModifiedTime + : (currentModifiedTime ?? config.lastModifiedTime), lastCheckedTimestamp: now.toISOString(), }, logger From da1e43a8370352e0f647394a23ad47c5b1d259bf Mon Sep 17 00:00:00 2001 From: waleed Date: Sat, 11 Apr 2026 10:29:46 -0700 Subject: [PATCH 4/6] chore(trigger): remove verbose inline comments from google-sheets poller --- .../sim/lib/webhooks/polling/google-sheets.ts | 60 ++++--------------- 1 file changed, 13 insertions(+), 47 deletions(-) diff --git a/apps/sim/lib/webhooks/polling/google-sheets.ts b/apps/sim/lib/webhooks/polling/google-sheets.ts index d9975cd893..0778bdf179 100644 --- a/apps/sim/lib/webhooks/polling/google-sheets.ts +++ b/apps/sim/lib/webhooks/polling/google-sheets.ts @@ -23,10 +23,7 @@ interface GoogleSheetsWebhookConfig { manualSheetName?: string valueRenderOption?: ValueRenderOption dateTimeRenderOption?: DateTimeRenderOption - /** - * The 1-indexed row number of the last row we have seeded or processed. - * New rows are emitted starting from lastIndexChecked + 1. - */ + /** 1-indexed row number of the last row seeded or processed. */ lastIndexChecked?: number lastModifiedTime?: string lastCheckedTimestamp?: string @@ -70,7 +67,6 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { return 'failure' } - // Pre-check: use Drive API to see if the file was modified since last poll const { unchanged: skipPoll, currentModifiedTime } = await isDriveFileUnchanged( accessToken, spreadsheetId, @@ -90,13 +86,9 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { return 'success' } - // Resolve render options before the sheet fetch so they apply to both - // row counting and header detection in the same API call. const valueRender = config.valueRenderOption || 'FORMATTED_VALUE' const dateTimeRender = config.dateTimeRenderOption || 'SERIAL_NUMBER' - // Single API call: get current row count AND auto-detect the header row. - // Combining these avoids a second round-trip when new rows are present. const { rowCount: currentRowCount, headers, @@ -129,8 +121,6 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { return 'success' } - // Rows deleted or unchanged — update pointer to current position to avoid - // re-processing if rows are later re-added at a lower index if (currentRowCount <= config.lastIndexChecked) { if (currentRowCount < config.lastIndexChecked) { logger.warn( @@ -151,17 +141,14 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { return 'success' } - // New rows detected const newRowCount = currentRowCount - config.lastIndexChecked const maxRows = config.maxRowsPerPoll || MAX_ROWS_PER_POLL const rowsToFetch = Math.min(newRowCount, maxRows) const startRow = config.lastIndexChecked + 1 const endRow = config.lastIndexChecked + rowsToFetch - // If the header row (or blank rows above it) falls within the current - // fetch window, skip past them so the header is never emitted as a data - // event. This happens when lastIndexChecked was seeded from an empty sheet - // and the user subsequently added a header row + data rows together. + // Skip past the header row (and any blank rows above it) so it is never + // emitted as a data event. const adjustedStartRow = headerRowIndex > 0 ? Math.max(startRow, headerRowIndex + 1) : startRow @@ -169,11 +156,8 @@ export const googleSheetsPollingHandler: PollingProviderHandler = { `[${requestId}] Found ${newRowCount} new rows for webhook ${webhookId}, processing rows ${adjustedStartRow}-${endRow}` ) - // All rows in this batch are header or blank rows — advance the pointer - // and skip data fetching entirely. + // Entire batch is header/blank rows — advance pointer and skip fetch. if (adjustedStartRow > endRow) { - // Preserve the old modifiedTime if there are remaining rows beyond this - // batch so the Drive pre-check doesn't skip the next poll cycle. const hasRemainingRows = rowsToFetch < newRowCount await updateWebhookProviderConfig( webhookId, @@ -291,20 +275,14 @@ async function getDriveFileModifiedTime( } /** - * Fetches the full sheet in a single API call and returns both the current row - * count and the auto-detected headers. + * Fetches the full sheet (A:Z) and returns the row count, auto-detected + * headers, and the 1-indexed header row number in a single API call. * - * Row count: the Sheets API omits trailing empty rows, so the length of the - * returned values array equals the 1-indexed number of the last row with data. - * - * Header detection: scans the first {@link HEADER_SCAN_ROWS} rows and returns - * the first non-empty row as headers. This correctly handles sheets where - * headers are not in row 1 (e.g. blank rows or a title row above the column - * headers). `headerRowIndex` is the 1-indexed row number of that row, or 0 if - * no non-empty row was found within the scan window. - * - * Combining both into one call avoids the extra round-trip that a separate - * header-detection fetch would require on every cycle where new rows exist. + * The Sheets API omits trailing empty rows, so `rows.length` equals the last + * non-empty row number. Leading empty rows within the range are returned as [] + * so `rows[i]` reliably maps to sheet row `i + 1`. Header detection scans the + * first {@link HEADER_SCAN_ROWS} rows for the first non-empty row. Returns + * `headerRowIndex = 0` when no header is found within the scan window. */ async function fetchSheetState( accessToken: string, @@ -316,12 +294,6 @@ async function fetchSheetState( logger: ReturnType ): Promise<{ rowCount: number; headers: string[]; headerRowIndex: number }> { const encodedSheet = encodeURIComponent(sheetName) - // Fetch all rows across columns A–Z with majorDimension=ROWS so the API - // returns one entry per row that has ANY non-empty cell. Rows where column A - // is empty but other columns have data are included. The array length equals - // the 1-indexed row number of the last row with data (trailing empty rows are - // omitted by the Sheets API). Leading empty rows within the range are included - // as [] so values[i] reliably corresponds to sheet row i+1. const params = new URLSearchParams({ majorDimension: 'ROWS', fields: 'values', @@ -337,13 +309,11 @@ async function fetchSheetState( if (!response.ok) { const status = response.status const errorData = await response.json().catch(() => ({})) - if (status === 403 || status === 429) { throw new Error( `Sheets API rate limit (${status}) — skipping to retry next poll cycle: ${JSON.stringify(errorData)}` ) } - throw new Error( `Failed to fetch sheet state: ${status} ${response.statusText} - ${JSON.stringify(errorData)}` ) @@ -353,7 +323,6 @@ async function fetchSheetState( const rows = (data.values as string[][] | undefined) ?? [] const rowCount = rows.length - // Find the first non-empty row within the header scan window let headers: string[] = [] let headerRowIndex = 0 for (let i = 0; i < Math.min(rows.length, HEADER_SCAN_ROWS); i++) { @@ -394,13 +363,11 @@ async function fetchRowRange( if (!response.ok) { const status = response.status const errorData = await response.json().catch(() => ({})) - if (status === 403 || status === 429) { throw new Error( `Sheets API rate limit (${status}) — skipping to retry next poll cycle: ${JSON.stringify(errorData)}` ) } - throw new Error( `Failed to fetch rows ${startRow}-${endRow}: ${status} ${response.statusText} - ${JSON.stringify(errorData)}` ) @@ -426,10 +393,9 @@ async function processRows( for (let i = 0; i < rows.length; i++) { const row = rows[i] - const rowNumber = startRowIndex + i // startRowIndex is already the 1-indexed sheet row + const rowNumber = startRowIndex + i - // The Sheets API returns [] for empty rows within a fixed range. Skip them - // rather than firing a workflow run with no meaningful data. + // Skip empty rows — don't fire a workflow run with no data. if (!row || row.length === 0) { logger.info(`[${requestId}] Skipping empty row ${rowNumber} for webhook ${webhookData.id}`) processedCount++ From 3ebdcff8a803b8937f735e3cff12cbbbd9048ab6 Mon Sep 17 00:00:00 2001 From: waleed Date: Sat, 11 Apr 2026 11:19:07 -0700 Subject: [PATCH 5/6] fix(trigger): revert to full-width A:Z fetch for correct row count and consistent column scope --- apps/sim/lib/webhooks/polling/google-sheets.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/apps/sim/lib/webhooks/polling/google-sheets.ts b/apps/sim/lib/webhooks/polling/google-sheets.ts index 0778bdf179..8d942f6b19 100644 --- a/apps/sim/lib/webhooks/polling/google-sheets.ts +++ b/apps/sim/lib/webhooks/polling/google-sheets.ts @@ -275,13 +275,12 @@ async function getDriveFileModifiedTime( } /** - * Fetches the full sheet (A:Z) and returns the row count, auto-detected - * headers, and the 1-indexed header row number in a single API call. + * Fetches the sheet (A:Z) and returns the row count, auto-detected headers, + * and the 1-indexed header row number in a single API call. * * The Sheets API omits trailing empty rows, so `rows.length` equals the last - * non-empty row number. Leading empty rows within the range are returned as [] - * so `rows[i]` reliably maps to sheet row `i + 1`. Header detection scans the - * first {@link HEADER_SCAN_ROWS} rows for the first non-empty row. Returns + * non-empty row in columns A–Z. Header detection scans the first + * {@link HEADER_SCAN_ROWS} rows for the first non-empty row. Returns * `headerRowIndex = 0` when no header is found within the scan window. */ async function fetchSheetState( From 9b3af87dc6d2ed4f8232909c449b8383251eceeb Mon Sep 17 00:00:00 2001 From: waleed Date: Sat, 11 Apr 2026 11:30:44 -0700 Subject: [PATCH 6/6] fix(trigger): don't count skipped empty rows as processed --- apps/sim/lib/webhooks/polling/google-sheets.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/sim/lib/webhooks/polling/google-sheets.ts b/apps/sim/lib/webhooks/polling/google-sheets.ts index 8d942f6b19..1aaf9bd25f 100644 --- a/apps/sim/lib/webhooks/polling/google-sheets.ts +++ b/apps/sim/lib/webhooks/polling/google-sheets.ts @@ -397,7 +397,6 @@ async function processRows( // Skip empty rows — don't fire a workflow run with no data. if (!row || row.length === 0) { logger.info(`[${requestId}] Skipping empty row ${rowNumber} for webhook ${webhookData.id}`) - processedCount++ continue }