sessionStoragePortable.ts
utils/sessionStoragePortable.ts
794
Lines
25448
Bytes
18
Exports
6
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with session-engine. It contains 794 lines, 6 detected imports, and 18 detected exports.
Important relationships
Detected exports
LITE_READ_BUF_SIZEvalidateUuidunescapeJsonStringextractJsonStringFieldextractLastJsonStringFieldextractFirstPromptFromHeadreadHeadAndTailLiteSessionFilereadSessionLiteMAX_SANITIZED_LENGTHsanitizePathgetProjectsDirgetProjectDircanonicalizePathfindProjectDirresolveSessionFilePathSKIP_PRECOMPACT_THRESHOLDreadTranscriptForLoad
Keywords
bufferheadcarrylenlengthtexttailcontinuechunkfilepathsize
Detected imports
cryptofs/promisespath./envUtils.js./getWorktreePathsPortable.js./hash.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
/**
* Portable session storage utilities.
*
* Pure Node.js — no internal dependencies on logging, experiments, or feature
* flags. Shared between the CLI (src/utils/sessionStorage.ts) and the VS Code
* extension (packages/claude-vscode/src/common-host/sessionStorage.ts).
*/
import type { UUID } from 'crypto'
import { open as fsOpen, readdir, realpath, stat } from 'fs/promises'
import { join } from 'path'
import { getClaudeConfigHomeDir } from './envUtils.js'
import { getWorktreePathsPortable } from './getWorktreePathsPortable.js'
import { djb2Hash } from './hash.js'
/** Size of the head/tail buffer for lite metadata reads. */
export const LITE_READ_BUF_SIZE = 65536
// ---------------------------------------------------------------------------
// UUID validation
// ---------------------------------------------------------------------------
const uuidRegex =
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
export function validateUuid(maybeUuid: unknown): UUID | null {
if (typeof maybeUuid !== 'string') return null
return uuidRegex.test(maybeUuid) ? (maybeUuid as UUID) : null
}
// ---------------------------------------------------------------------------
// JSON string field extraction — no full parse, works on truncated lines
// ---------------------------------------------------------------------------
/**
* Unescape a JSON string value extracted as raw text.
* Only allocates a new string when escape sequences are present.
*/
export function unescapeJsonString(raw: string): string {
if (!raw.includes('\\')) return raw
try {
return JSON.parse(`"${raw}"`)
} catch {
return raw
}
}
/**
* Extracts a simple JSON string field value from raw text without full parsing.
* Looks for `"key":"value"` or `"key": "value"` patterns.
* Returns the first match, or undefined if not found.
*/
export function extractJsonStringField(
text: string,
key: string,
): string | undefined {
const patterns = [`"${key}":"`, `"${key}": "`]
for (const pattern of patterns) {
const idx = text.indexOf(pattern)
if (idx < 0) continue
const valueStart = idx + pattern.length
let i = valueStart
while (i < text.length) {
if (text[i] === '\\') {
i += 2
continue
}
if (text[i] === '"') {
return unescapeJsonString(text.slice(valueStart, i))
}
i++
}
}
return undefined
}
/**
* Like extractJsonStringField but finds the LAST occurrence.
* Useful for fields that are appended (customTitle, tag, etc.).
*/
export function extractLastJsonStringField(
text: string,
key: string,
): string | undefined {
const patterns = [`"${key}":"`, `"${key}": "`]
let lastValue: string | undefined
for (const pattern of patterns) {
let searchFrom = 0
while (true) {
const idx = text.indexOf(pattern, searchFrom)
if (idx < 0) break
const valueStart = idx + pattern.length
let i = valueStart
while (i < text.length) {
if (text[i] === '\\') {
i += 2
continue
}
if (text[i] === '"') {
lastValue = unescapeJsonString(text.slice(valueStart, i))
break
}
i++
}
searchFrom = i + 1
}
}
return lastValue
}
// ---------------------------------------------------------------------------
// First prompt extraction from head chunk
// ---------------------------------------------------------------------------
/**
* Pattern matching auto-generated or system messages that should be skipped
* when looking for the first meaningful user prompt. Matches anything that
* starts with a lowercase XML-like tag (IDE context, hook output, task
* notifications, channel messages, etc.) or a synthetic interrupt marker.
*/
const SKIP_FIRST_PROMPT_PATTERN =
/^(?:\s*<[a-z][\w-]*[\s>]|\[Request interrupted by user[^\]]*\])/
const COMMAND_NAME_RE = /<command-name>(.*?)<\/command-name>/
/**
* Extracts the first meaningful user prompt from a JSONL head chunk.
*
* Skips tool_result messages, isMeta, isCompactSummary, command-name messages,
* and auto-generated patterns (session hooks, tick, IDE metadata, etc.).
* Truncates to 200 chars.
*/
export function extractFirstPromptFromHead(head: string): string {
let start = 0
let commandFallback = ''
while (start < head.length) {
const newlineIdx = head.indexOf('\n', start)
const line =
newlineIdx >= 0 ? head.slice(start, newlineIdx) : head.slice(start)
start = newlineIdx >= 0 ? newlineIdx + 1 : head.length
if (!line.includes('"type":"user"') && !line.includes('"type": "user"'))
continue
if (line.includes('"tool_result"')) continue
if (line.includes('"isMeta":true') || line.includes('"isMeta": true'))
continue
if (
line.includes('"isCompactSummary":true') ||
line.includes('"isCompactSummary": true')
)
continue
try {
const entry = JSON.parse(line) as Record<string, unknown>
if (entry.type !== 'user') continue
const message = entry.message as Record<string, unknown> | undefined
if (!message) continue
const content = message.content
const texts: string[] = []
if (typeof content === 'string') {
texts.push(content)
} else if (Array.isArray(content)) {
for (const block of content as Record<string, unknown>[]) {
if (block.type === 'text' && typeof block.text === 'string') {
texts.push(block.text as string)
}
}
}
for (const raw of texts) {
let result = raw.replace(/\n/g, ' ').trim()
if (!result) continue
// Skip slash-command messages but remember first as fallback
const cmdMatch = COMMAND_NAME_RE.exec(result)
if (cmdMatch) {
if (!commandFallback) commandFallback = cmdMatch[1]!
continue
}
// Format bash input with ! prefix before the generic XML skip
const bashMatch = /<bash-input>([\s\S]*?)<\/bash-input>/.exec(result)
if (bashMatch) return `! ${bashMatch[1]!.trim()}`
if (SKIP_FIRST_PROMPT_PATTERN.test(result)) continue
if (result.length > 200) {
result = result.slice(0, 200).trim() + '\u2026'
}
return result
}
} catch {
continue
}
}
if (commandFallback) return commandFallback
return ''
}
// ---------------------------------------------------------------------------
// File I/O — read head and tail of a file
// ---------------------------------------------------------------------------
/**
* Reads the first and last LITE_READ_BUF_SIZE bytes of a file.
*
* For small files where head covers tail, `tail === head`.
* Accepts a shared Buffer to avoid per-file allocation overhead.
* Returns `{ head: '', tail: '' }` on any error.
*/
export async function readHeadAndTail(
filePath: string,
fileSize: number,
buf: Buffer,
): Promise<{ head: string; tail: string }> {
try {
const fh = await fsOpen(filePath, 'r')
try {
const headResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, 0)
if (headResult.bytesRead === 0) return { head: '', tail: '' }
const head = buf.toString('utf8', 0, headResult.bytesRead)
const tailOffset = Math.max(0, fileSize - LITE_READ_BUF_SIZE)
let tail = head
if (tailOffset > 0) {
const tailResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, tailOffset)
tail = buf.toString('utf8', 0, tailResult.bytesRead)
}
return { head, tail }
} finally {
await fh.close()
}
} catch {
return { head: '', tail: '' }
}
}
export type LiteSessionFile = {
mtime: number
size: number
head: string
tail: string
}
/**
* Opens a single session file, stats it, and reads head + tail in one fd.
* Allocates its own buffer — safe for concurrent use with Promise.all.
* Returns null on any error.
*/
export async function readSessionLite(
filePath: string,
): Promise<LiteSessionFile | null> {
try {
const fh = await fsOpen(filePath, 'r')
try {
const stat = await fh.stat()
const buf = Buffer.allocUnsafe(LITE_READ_BUF_SIZE)
const headResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, 0)
if (headResult.bytesRead === 0) return null
const head = buf.toString('utf8', 0, headResult.bytesRead)
const tailOffset = Math.max(0, stat.size - LITE_READ_BUF_SIZE)
let tail = head
if (tailOffset > 0) {
const tailResult = await fh.read(buf, 0, LITE_READ_BUF_SIZE, tailOffset)
tail = buf.toString('utf8', 0, tailResult.bytesRead)
}
return { mtime: stat.mtime.getTime(), size: stat.size, head, tail }
} finally {
await fh.close()
}
} catch {
return null
}
}
// ---------------------------------------------------------------------------
// Path sanitization
// ---------------------------------------------------------------------------
/**
* Maximum length for a single filesystem path component (directory or file name).
* Most filesystems (ext4, APFS, NTFS) limit individual components to 255 bytes.
* We use 200 to leave room for the hash suffix and separator.
*/
export const MAX_SANITIZED_LENGTH = 200
function simpleHash(str: string): string {
return Math.abs(djb2Hash(str)).toString(36)
}
/**
* Makes a string safe for use as a directory or file name.
* Replaces all non-alphanumeric characters with hyphens.
* This ensures compatibility across all platforms, including Windows
* where characters like colons are reserved.
*
* For deeply nested paths that would exceed filesystem limits (255 bytes),
* truncates and appends a hash suffix for uniqueness.
*
* @param name - The string to make safe (e.g., '/Users/foo/my-project' or 'plugin:name:server')
* @returns A safe name (e.g., '-Users-foo-my-project' or 'plugin-name-server')
*/
export function sanitizePath(name: string): string {
const sanitized = name.replace(/[^a-zA-Z0-9]/g, '-')
if (sanitized.length <= MAX_SANITIZED_LENGTH) {
return sanitized
}
const hash =
typeof Bun !== 'undefined' ? Bun.hash(name).toString(36) : simpleHash(name)
return `${sanitized.slice(0, MAX_SANITIZED_LENGTH)}-${hash}`
}
// ---------------------------------------------------------------------------
// Project directory discovery (shared by listSessions & getSessionMessages)
// ---------------------------------------------------------------------------
export function getProjectsDir(): string {
return join(getClaudeConfigHomeDir(), 'projects')
}
export function getProjectDir(projectDir: string): string {
return join(getProjectsDir(), sanitizePath(projectDir))
}
/**
* Resolves a directory path to its canonical form using realpath + NFC
* normalization. Falls back to NFC-only if realpath fails (e.g., the
* directory doesn't exist yet). Ensures symlinked paths (e.g.,
* /tmp → /private/tmp on macOS) resolve to the same project directory.
*/
export async function canonicalizePath(dir: string): Promise<string> {
try {
return (await realpath(dir)).normalize('NFC')
} catch {
return dir.normalize('NFC')
}
}
/**
* Finds the project directory for a given path, tolerating hash mismatches
* for long paths (>200 chars). The CLI uses Bun.hash while the SDK under
* Node.js uses simpleHash — for paths that exceed MAX_SANITIZED_LENGTH,
* these produce different directory suffixes. This function falls back to
* prefix-based scanning when the exact match doesn't exist.
*/
export async function findProjectDir(
projectPath: string,
): Promise<string | undefined> {
const exact = getProjectDir(projectPath)
try {
await readdir(exact)
return exact
} catch {
// Exact match failed — for short paths this means no sessions exist.
// For long paths, try prefix matching to handle hash mismatches.
const sanitized = sanitizePath(projectPath)
if (sanitized.length <= MAX_SANITIZED_LENGTH) {
return undefined
}
const prefix = sanitized.slice(0, MAX_SANITIZED_LENGTH)
const projectsDir = getProjectsDir()
try {
const dirents = await readdir(projectsDir, { withFileTypes: true })
const match = dirents.find(
d => d.isDirectory() && d.name.startsWith(prefix + '-'),
)
return match ? join(projectsDir, match.name) : undefined
} catch {
return undefined
}
}
}
/**
* Resolve a sessionId to its on-disk JSONL file path.
*
* When `dir` is provided: canonicalize it, look in that project's directory
* (with findProjectDir fallback for Bun/Node hash mismatches), then fall back
* to sibling git worktrees. `projectPath` in the result is the canonical
* user-facing directory the file was found under.
*
* When `dir` is omitted: scan all project directories under ~/.claude/projects/.
* `projectPath` is undefined in this case (no meaningful project path to report).
*
* Existence is checked by stat (operate-then-catch-ENOENT, no existsSync).
* Zero-byte files are treated as not-found so callers continue searching past
* a truncated copy to find a valid one in a sibling directory.
*
* `fileSize` is returned so callers (loadSessionBuffer) don't need to re-stat.
*
* Shared by getSessionInfoImpl and getSessionMessagesImpl — the caller
* invokes its own reader (readSessionLite / loadSessionBuffer) on the
* resolved path.
*/
export async function resolveSessionFilePath(
sessionId: string,
dir?: string,
): Promise<
| { filePath: string; projectPath: string | undefined; fileSize: number }
| undefined
> {
const fileName = `${sessionId}.jsonl`
if (dir) {
const canonical = await canonicalizePath(dir)
const projectDir = await findProjectDir(canonical)
if (projectDir) {
const filePath = join(projectDir, fileName)
try {
const s = await stat(filePath)
if (s.size > 0)
return { filePath, projectPath: canonical, fileSize: s.size }
} catch {
// ENOENT/EACCES — keep searching
}
}
// Worktree fallback — sessions may live under a different worktree root
let worktreePaths: string[]
try {
worktreePaths = await getWorktreePathsPortable(canonical)
} catch {
worktreePaths = []
}
for (const wt of worktreePaths) {
if (wt === canonical) continue
const wtProjectDir = await findProjectDir(wt)
if (!wtProjectDir) continue
const filePath = join(wtProjectDir, fileName)
try {
const s = await stat(filePath)
if (s.size > 0) return { filePath, projectPath: wt, fileSize: s.size }
} catch {
// ENOENT/EACCES — keep searching
}
}
return undefined
}
// No dir — scan all project directories
const projectsDir = getProjectsDir()
let dirents: string[]
try {
dirents = await readdir(projectsDir)
} catch {
return undefined
}
for (const name of dirents) {
const filePath = join(projectsDir, name, fileName)
try {
const s = await stat(filePath)
if (s.size > 0)
return { filePath, projectPath: undefined, fileSize: s.size }
} catch {
// ENOENT/ENOTDIR — not in this project, keep scanning
}
}
return undefined
}
// ---------------------------------------------------------------------------
// Compact-boundary chunked read (shared by loadTranscriptFile & SDK getSessionMessages)
// ---------------------------------------------------------------------------
/** Chunk size for the forward transcript reader. 1 MB balances I/O calls vs buffer growth. */
const TRANSCRIPT_READ_CHUNK_SIZE = 1024 * 1024
/**
* File size below which precompact filtering is skipped.
* Large sessions (>5 MB) almost always have compact boundaries — they got big
* because of many turns triggering auto-compact.
*/
export const SKIP_PRECOMPACT_THRESHOLD = 5 * 1024 * 1024
/** Marker bytes searched for when locating the boundary. Lazy: allocated on
* first use, not at module load. Most sessions never resume. */
let _compactBoundaryMarker: Buffer | undefined
function compactBoundaryMarker(): Buffer {
return (_compactBoundaryMarker ??= Buffer.from('"compact_boundary"'))
}
/**
* Confirm a byte-matched line is a real compact_boundary (marker can appear
* inside user content) and check for preservedSegment.
*/
function parseBoundaryLine(
line: string,
): { hasPreservedSegment: boolean } | null {
try {
const parsed = JSON.parse(line) as {
type?: string
subtype?: string
compactMetadata?: { preservedSegment?: unknown }
}
if (parsed.type !== 'system' || parsed.subtype !== 'compact_boundary') {
return null
}
return {
hasPreservedSegment: Boolean(parsed.compactMetadata?.preservedSegment),
}
} catch {
return null
}
}
/**
* Single forward chunked read for the --resume load path. Attr-snap lines
* are skipped at the fd level; compact boundaries truncate in-stream. Peak
* is the output size, not the file size.
*
* The surviving (last) attr-snap is appended at EOF instead of in-place;
* restoreAttributionStateFromSnapshots only reads [length-1] so position
* doesn't matter.
*/
type Sink = { buf: Buffer; len: number; cap: number }
function sinkWrite(s: Sink, src: Buffer, start: number, end: number): void {
const n = end - start
if (n <= 0) return
if (s.len + n > s.buf.length) {
const grown = Buffer.allocUnsafe(
Math.min(Math.max(s.buf.length * 2, s.len + n), s.cap),
)
s.buf.copy(grown, 0, 0, s.len)
s.buf = grown
}
src.copy(s.buf, s.len, start, end)
s.len += n
}
function hasPrefix(
src: Buffer,
prefix: Buffer,
at: number,
end: number,
): boolean {
return (
end - at >= prefix.length &&
src.compare(prefix, 0, prefix.length, at, at + prefix.length) === 0
)
}
const ATTR_SNAP_PREFIX = Buffer.from('{"type":"attribution-snapshot"')
const SYSTEM_PREFIX = Buffer.from('{"type":"system"')
const LF = 0x0a
const LF_BYTE = Buffer.from([LF])
const BOUNDARY_SEARCH_BOUND = 256 // marker sits ~28 bytes in; 256 is slack
type LoadState = {
out: Sink
boundaryStartOffset: number
hasPreservedSegment: boolean
lastSnapSrc: Buffer | null // most-recent attr-snap, appended at EOF
lastSnapLen: number
lastSnapBuf: Buffer | undefined
bufFileOff: number // file offset of buf[0]
carryLen: number
carryBuf: Buffer | undefined
straddleSnapCarryLen: number // per-chunk; reset by processStraddle
straddleSnapTailEnd: number
}
// Line spanning the chunk seam. 0 = fall through to concat.
function processStraddle(
s: LoadState,
chunk: Buffer,
bytesRead: number,
): number {
s.straddleSnapCarryLen = 0
s.straddleSnapTailEnd = 0
if (s.carryLen === 0) return 0
const cb = s.carryBuf!
const firstNl = chunk.indexOf(LF)
if (firstNl === -1 || firstNl >= bytesRead) return 0
const tailEnd = firstNl + 1
if (hasPrefix(cb, ATTR_SNAP_PREFIX, 0, s.carryLen)) {
s.straddleSnapCarryLen = s.carryLen
s.straddleSnapTailEnd = tailEnd
s.lastSnapSrc = null
} else if (s.carryLen < ATTR_SNAP_PREFIX.length) {
return 0 // too short to rule out attr-snap
} else {
if (hasPrefix(cb, SYSTEM_PREFIX, 0, s.carryLen)) {
const hit = parseBoundaryLine(
cb.toString('utf-8', 0, s.carryLen) +
chunk.toString('utf-8', 0, firstNl),
)
if (hit?.hasPreservedSegment) {
s.hasPreservedSegment = true
} else if (hit) {
s.out.len = 0
s.boundaryStartOffset = s.bufFileOff
s.hasPreservedSegment = false
s.lastSnapSrc = null
}
}
sinkWrite(s.out, cb, 0, s.carryLen)
sinkWrite(s.out, chunk, 0, tailEnd)
}
s.bufFileOff += s.carryLen + tailEnd
s.carryLen = 0
return tailEnd
}
// Strip attr-snaps, truncate on boundaries. Kept lines write as runs.
function scanChunkLines(
s: LoadState,
buf: Buffer,
boundaryMarker: Buffer,
): { lastSnapStart: number; lastSnapEnd: number; trailStart: number } {
let boundaryAt = buf.indexOf(boundaryMarker)
let runStart = 0
let lineStart = 0
let lastSnapStart = -1
let lastSnapEnd = -1
let nl = buf.indexOf(LF)
while (nl !== -1) {
const lineEnd = nl + 1
if (boundaryAt !== -1 && boundaryAt < lineStart) {
boundaryAt = buf.indexOf(boundaryMarker, lineStart)
}
if (hasPrefix(buf, ATTR_SNAP_PREFIX, lineStart, lineEnd)) {
sinkWrite(s.out, buf, runStart, lineStart)
lastSnapStart = lineStart
lastSnapEnd = lineEnd
runStart = lineEnd
} else if (
boundaryAt >= lineStart &&
boundaryAt < Math.min(lineStart + BOUNDARY_SEARCH_BOUND, lineEnd)
) {
const hit = parseBoundaryLine(buf.toString('utf-8', lineStart, nl))
if (hit?.hasPreservedSegment) {
s.hasPreservedSegment = true // don't truncate; preserved msgs already in output
} else if (hit) {
s.out.len = 0
s.boundaryStartOffset = s.bufFileOff + lineStart
s.hasPreservedSegment = false
s.lastSnapSrc = null
lastSnapStart = -1
s.straddleSnapCarryLen = 0
runStart = lineStart
}
boundaryAt = buf.indexOf(
boundaryMarker,
boundaryAt + boundaryMarker.length,
)
}
lineStart = lineEnd
nl = buf.indexOf(LF, lineStart)
}
sinkWrite(s.out, buf, runStart, lineStart)
return { lastSnapStart, lastSnapEnd, trailStart: lineStart }
}
// In-buf snap wins over straddle (later in file). carryBuf still valid here.
function captureSnap(
s: LoadState,
buf: Buffer,
chunk: Buffer,
lastSnapStart: number,
lastSnapEnd: number,
): void {
if (lastSnapStart !== -1) {
s.lastSnapLen = lastSnapEnd - lastSnapStart
if (s.lastSnapBuf === undefined || s.lastSnapLen > s.lastSnapBuf.length) {
s.lastSnapBuf = Buffer.allocUnsafe(s.lastSnapLen)
}
buf.copy(s.lastSnapBuf, 0, lastSnapStart, lastSnapEnd)
s.lastSnapSrc = s.lastSnapBuf
} else if (s.straddleSnapCarryLen > 0) {
s.lastSnapLen = s.straddleSnapCarryLen + s.straddleSnapTailEnd
if (s.lastSnapBuf === undefined || s.lastSnapLen > s.lastSnapBuf.length) {
s.lastSnapBuf = Buffer.allocUnsafe(s.lastSnapLen)
}
s.carryBuf!.copy(s.lastSnapBuf, 0, 0, s.straddleSnapCarryLen)
chunk.copy(s.lastSnapBuf, s.straddleSnapCarryLen, 0, s.straddleSnapTailEnd)
s.lastSnapSrc = s.lastSnapBuf
}
}
function captureCarry(s: LoadState, buf: Buffer, trailStart: number): void {
s.carryLen = buf.length - trailStart
if (s.carryLen > 0) {
if (s.carryBuf === undefined || s.carryLen > s.carryBuf.length) {
s.carryBuf = Buffer.allocUnsafe(s.carryLen)
}
buf.copy(s.carryBuf, 0, trailStart, buf.length)
}
}
function finalizeOutput(s: LoadState): void {
if (s.carryLen > 0) {
const cb = s.carryBuf!
if (hasPrefix(cb, ATTR_SNAP_PREFIX, 0, s.carryLen)) {
s.lastSnapSrc = cb
s.lastSnapLen = s.carryLen
} else {
sinkWrite(s.out, cb, 0, s.carryLen)
}
}
if (s.lastSnapSrc) {
if (s.out.len > 0 && s.out.buf[s.out.len - 1] !== LF) {
sinkWrite(s.out, LF_BYTE, 0, 1)
}
sinkWrite(s.out, s.lastSnapSrc, 0, s.lastSnapLen)
}
}
export async function readTranscriptForLoad(
filePath: string,
fileSize: number,
): Promise<{
boundaryStartOffset: number
postBoundaryBuf: Buffer
hasPreservedSegment: boolean
}> {
const boundaryMarker = compactBoundaryMarker()
const CHUNK_SIZE = TRANSCRIPT_READ_CHUNK_SIZE
const s: LoadState = {
out: {
// Gated callers enter with fileSize > 5MB, so min(fileSize, 8MB) lands
// in [5, 8]MB; large boundaryless sessions (24-31MB output) take 2
// grows. Ungated callers (attribution.ts) pass small files too — the
// min just right-sizes the initial buf, no grows.
buf: Buffer.allocUnsafe(Math.min(fileSize, 8 * 1024 * 1024)),
len: 0,
// +1: finalizeOutput may insert one LF between a non-LF-terminated
// carry and the reordered last attr-snap (crash-truncated file).
cap: fileSize + 1,
},
boundaryStartOffset: 0,
hasPreservedSegment: false,
lastSnapSrc: null,
lastSnapLen: 0,
lastSnapBuf: undefined,
bufFileOff: 0,
carryLen: 0,
carryBuf: undefined,
straddleSnapCarryLen: 0,
straddleSnapTailEnd: 0,
}
const chunk = Buffer.allocUnsafe(CHUNK_SIZE)
const fd = await fsOpen(filePath, 'r')
try {
let filePos = 0
while (filePos < fileSize) {
const { bytesRead } = await fd.read(
chunk,
0,
Math.min(CHUNK_SIZE, fileSize - filePos),
filePos,
)
if (bytesRead === 0) break
filePos += bytesRead
const chunkOff = processStraddle(s, chunk, bytesRead)
let buf: Buffer
if (s.carryLen > 0) {
const bufLen = s.carryLen + (bytesRead - chunkOff)
buf = Buffer.allocUnsafe(bufLen)
s.carryBuf!.copy(buf, 0, 0, s.carryLen)
chunk.copy(buf, s.carryLen, chunkOff, bytesRead)
} else {
buf = chunk.subarray(chunkOff, bytesRead)
}
const r = scanChunkLines(s, buf, boundaryMarker)
captureSnap(s, buf, chunk, r.lastSnapStart, r.lastSnapEnd)
captureCarry(s, buf, r.trailStart)
s.bufFileOff += r.trailStart
}
finalizeOutput(s)
} finally {
await fd.close()
}
return {
boundaryStartOffset: s.boundaryStartOffset,
postBoundaryBuf: s.out.buf.subarray(0, s.out.len),
hasPreservedSegment: s.hasPreservedSegment,
}
}