Files
wifi-densepose/vendor/ruvector/npm/packages/rvdna/index.js

393 lines
14 KiB
JavaScript

const { platform, arch } = process;
// Platform-specific native binary packages
const platformMap = {
'linux': {
'x64': '@ruvector/rvdna-linux-x64-gnu',
'arm64': '@ruvector/rvdna-linux-arm64-gnu'
},
'darwin': {
'x64': '@ruvector/rvdna-darwin-x64',
'arm64': '@ruvector/rvdna-darwin-arm64'
},
'win32': {
'x64': '@ruvector/rvdna-win32-x64-msvc'
}
};
function loadNativeModule() {
const platformPackage = platformMap[platform]?.[arch];
if (!platformPackage) {
throw new Error(
`Unsupported platform: ${platform}-${arch}\n` +
`@ruvector/rvdna native bindings are available for:\n` +
`- Linux (x64, ARM64)\n` +
`- macOS (x64, ARM64)\n` +
`- Windows (x64)\n\n` +
`For other platforms, use the WASM build: npm install @ruvector/rvdna-wasm`
);
}
try {
return require(platformPackage);
} catch (error) {
if (error.code === 'MODULE_NOT_FOUND') {
throw new Error(
`Native module not found for ${platform}-${arch}\n` +
`Please install: npm install ${platformPackage}\n` +
`Or reinstall @ruvector/rvdna to get optional dependencies`
);
}
throw error;
}
}
// Try native first, fall back to pure JS shim with basic functionality
let nativeModule;
try {
nativeModule = loadNativeModule();
} catch (e) {
// Native bindings not available — provide JS shim for basic operations
nativeModule = null;
}
// -------------------------------------------------------------------
// Public API — wraps native bindings or provides JS fallbacks
// -------------------------------------------------------------------
/**
* Encode a DNA string to 2-bit packed bytes (4 bases per byte).
* A=00, C=01, G=10, T=11. Returns a Buffer.
*/
function encode2bit(sequence) {
if (nativeModule?.encode2bit) return nativeModule.encode2bit(sequence);
// JS fallback
const map = { A: 0, C: 1, G: 2, T: 3, N: 0 };
const len = sequence.length;
const buf = Buffer.alloc(Math.ceil(len / 4));
for (let i = 0; i < len; i++) {
const byteIdx = i >> 2;
const bitOff = 6 - (i & 3) * 2;
buf[byteIdx] |= (map[sequence[i]] || 0) << bitOff;
}
return buf;
}
/**
* Decode 2-bit packed bytes back to a DNA string.
*/
function decode2bit(buffer, length) {
if (nativeModule?.decode2bit) return nativeModule.decode2bit(buffer, length);
const bases = ['A', 'C', 'G', 'T'];
let result = '';
for (let i = 0; i < length; i++) {
const byteIdx = i >> 2;
const bitOff = 6 - (i & 3) * 2;
result += bases[(buffer[byteIdx] >> bitOff) & 3];
}
return result;
}
/**
* Translate a DNA string to a protein amino acid string.
*/
function translateDna(sequence) {
if (nativeModule?.translateDna) return nativeModule.translateDna(sequence);
// JS fallback — standard genetic code
const codons = {
'TTT':'F','TTC':'F','TTA':'L','TTG':'L','CTT':'L','CTC':'L','CTA':'L','CTG':'L',
'ATT':'I','ATC':'I','ATA':'I','ATG':'M','GTT':'V','GTC':'V','GTA':'V','GTG':'V',
'TCT':'S','TCC':'S','TCA':'S','TCG':'S','CCT':'P','CCC':'P','CCA':'P','CCG':'P',
'ACT':'T','ACC':'T','ACA':'T','ACG':'T','GCT':'A','GCC':'A','GCA':'A','GCG':'A',
'TAT':'Y','TAC':'Y','TAA':'*','TAG':'*','CAT':'H','CAC':'H','CAA':'Q','CAG':'Q',
'AAT':'N','AAC':'N','AAA':'K','AAG':'K','GAT':'D','GAC':'D','GAA':'E','GAG':'E',
'TGT':'C','TGC':'C','TGA':'*','TGG':'W','CGT':'R','CGC':'R','CGA':'R','CGG':'R',
'AGT':'S','AGC':'S','AGA':'R','AGG':'R','GGT':'G','GGC':'G','GGA':'G','GGG':'G',
};
let protein = '';
for (let i = 0; i + 2 < sequence.length; i += 3) {
const codon = sequence.slice(i, i + 3).toUpperCase();
const aa = codons[codon] || 'X';
if (aa === '*') break;
protein += aa;
}
return protein;
}
/**
* Compute cosine similarity between two numeric arrays.
*/
function cosineSimilarity(a, b) {
if (nativeModule?.cosineSimilarity) return nativeModule.cosineSimilarity(a, b);
let dot = 0, magA = 0, magB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
magA += a[i] * a[i];
magB += b[i] * b[i];
}
magA = Math.sqrt(magA);
magB = Math.sqrt(magB);
return (magA && magB) ? dot / (magA * magB) : 0;
}
/**
* Convert a FASTA sequence string to .rvdna binary format.
* Returns a Buffer with the complete .rvdna file contents.
*/
function fastaToRvdna(sequence, options = {}) {
if (nativeModule?.fastaToRvdna) {
return nativeModule.fastaToRvdna(sequence, options.k || 11, options.dims || 512, options.blockSize || 500);
}
throw new Error('fastaToRvdna requires native bindings. Install the platform-specific package.');
}
/**
* Read a .rvdna file from a Buffer. Returns parsed sections.
*/
function readRvdna(buffer) {
if (nativeModule?.readRvdna) return nativeModule.readRvdna(buffer);
throw new Error('readRvdna requires native bindings. Install the platform-specific package.');
}
/**
* Check if native bindings are available.
*/
function isNativeAvailable() {
return nativeModule !== null;
}
// -------------------------------------------------------------------
// 23andMe Genotyping Pipeline (pure JS — mirrors Rust rvdna::genotyping)
// -------------------------------------------------------------------
/**
* Normalize a genotype string: uppercase, trim, sort allele pair.
* "ag" → "AG", "TC" → "CT", "DI" → "DI"
*/
function normalizeGenotype(gt) {
gt = gt.trim().toUpperCase();
if (gt.length === 2 && gt[0] > gt[1]) {
return gt[1] + gt[0];
}
return gt;
}
/**
* Parse a 23andMe raw data file (v4/v5 tab-separated format).
* @param {string} text - Raw file contents
* @returns {{ snps: Map<string,object>, totalMarkers: number, noCalls: number, chrCounts: Map<string,number>, build: string }}
*/
function parse23andMe(text) {
const snps = new Map();
const chrCounts = new Map();
let total = 0, noCalls = 0;
let build = 'Unknown';
for (const line of text.split('\n')) {
if (line.startsWith('#')) {
const lower = line.toLowerCase();
if (lower.includes('build 37') || lower.includes('grch37') || lower.includes('hg19')) build = 'GRCh37';
else if (lower.includes('build 38') || lower.includes('grch38') || lower.includes('hg38')) build = 'GRCh38';
continue;
}
if (!line.trim()) continue;
const parts = line.split('\t');
if (parts.length < 4) continue;
const [rsid, chrom, posStr, genotype] = parts;
total++;
if (genotype === '--') { noCalls++; continue; }
const pos = parseInt(posStr, 10) || 0;
const normGt = normalizeGenotype(genotype);
chrCounts.set(chrom, (chrCounts.get(chrom) || 0) + 1);
snps.set(rsid, { rsid, chromosome: chrom, position: pos, genotype: normGt });
}
if (total === 0) throw new Error('No markers found in file');
return { snps, totalMarkers: total, noCalls, chrCounts, build };
}
// CYP defining variant tables
const CYP2D6_DEFS = [
{ rsid: 'rs3892097', allele: '*4', alt: 'T', isDel: false, activity: 0.0, fn: 'No function (splicing defect)' },
{ rsid: 'rs35742686', allele: '*3', alt: '-', isDel: true, activity: 0.0, fn: 'No function (frameshift)' },
{ rsid: 'rs5030655', allele: '*6', alt: '-', isDel: true, activity: 0.0, fn: 'No function (frameshift)' },
{ rsid: 'rs1065852', allele: '*10', alt: 'T', isDel: false, activity: 0.5, fn: 'Decreased function' },
{ rsid: 'rs28371725', allele: '*41', alt: 'T', isDel: false, activity: 0.5, fn: 'Decreased function' },
{ rsid: 'rs28371706', allele: '*17', alt: 'T', isDel: false, activity: 0.5, fn: 'Decreased function' },
];
const CYP2C19_DEFS = [
{ rsid: 'rs4244285', allele: '*2', alt: 'A', isDel: false, activity: 0.0, fn: 'No function (splicing defect)' },
{ rsid: 'rs4986893', allele: '*3', alt: 'A', isDel: false, activity: 0.0, fn: 'No function (premature stop)' },
{ rsid: 'rs12248560', allele: '*17', alt: 'T', isDel: false, activity: 1.5, fn: 'Increased function' },
];
/**
* Call a CYP diplotype from a genotype map.
* @param {string} gene - Gene name (e.g., "CYP2D6")
* @param {object[]} defs - Defining variant table
* @param {Map<string,string>} gts - rsid → genotype map
*/
function callCypDiplotype(gene, defs, gts) {
const alleles = [];
const details = [];
const notes = [];
let genotyped = 0, matched = 0;
for (const def of defs) {
const gt = gts.get(def.rsid);
if (gt !== undefined) {
genotyped++;
if (def.isDel) {
if (gt === 'DD') { matched++; alleles.push([def.allele, def.activity], [def.allele, def.activity]); details.push(` ${def.rsid}: ${gt} -> homozygous ${def.allele} (${def.fn})`); }
else if (gt === 'DI') { matched++; alleles.push([def.allele, def.activity]); details.push(` ${def.rsid}: ${gt} -> heterozygous ${def.allele} (${def.fn})`); }
else { details.push(` ${def.rsid}: ${gt} -> reference (no ${def.allele})`); }
} else {
const hom = def.alt + def.alt;
if (gt === hom) { matched++; alleles.push([def.allele, def.activity], [def.allele, def.activity]); details.push(` ${def.rsid}: ${gt} -> homozygous ${def.allele} (${def.fn})`); }
else if (gt.includes(def.alt)) { matched++; alleles.push([def.allele, def.activity]); details.push(` ${def.rsid}: ${gt} -> heterozygous ${def.allele} (${def.fn})`); }
else { details.push(` ${def.rsid}: ${gt} -> reference (no ${def.allele})`); }
}
} else {
details.push(` ${def.rsid}: not genotyped`);
}
}
let confidence;
if (genotyped === 0) confidence = 'Unsupported';
else if (matched >= 2 && genotyped * 2 >= defs.length) confidence = 'Strong';
else if ((matched >= 1 && genotyped >= 2) || genotyped * 2 >= defs.length) confidence = 'Moderate';
else confidence = 'Weak';
if (confidence === 'Unsupported') notes.push('Panel lacks all defining variants for this gene.');
if (confidence === 'Weak') notes.push(`Only ${genotyped}/${defs.length} defining rsids genotyped; call unreliable.`);
notes.push('No phase or CNV resolution from genotyping array.');
while (alleles.length < 2) alleles.push(['*1', 1.0]);
const activity = alleles[0][1] + alleles[1][1];
let phenotype;
if (activity > 2.0) phenotype = 'UltraRapid';
else if (activity >= 1.0) phenotype = 'Normal';
else if (activity >= 0.5) phenotype = 'Intermediate';
else phenotype = 'Poor';
return {
gene, allele1: alleles[0][0], allele2: alleles[1][0],
activity, phenotype, confidence,
rsidsGenotyped: genotyped, rsidsMatched: matched, rsidsTotal: defs.length,
notes, details,
};
}
/** Call CYP2D6 diplotype */
function callCyp2d6(gts) { return callCypDiplotype('CYP2D6', CYP2D6_DEFS, gts); }
/** Call CYP2C19 diplotype */
function callCyp2c19(gts) { return callCypDiplotype('CYP2C19', CYP2C19_DEFS, gts); }
/**
* Determine APOE genotype from rs429358 + rs7412.
* @param {Map<string,string>} gts
*/
function determineApoe(gts) {
const gt1 = gts.get('rs429358') || '';
const gt2 = gts.get('rs7412') || '';
if (!gt1 || !gt2) return { genotype: 'Unable to determine (missing data)', rs429358: gt1, rs7412: gt2 };
const e4 = (gt1.match(/C/g) || []).length;
const e2 = (gt2.match(/T/g) || []).length;
const geno = {
'0,0': 'e3/e3 (most common, baseline risk)',
'0,1': 'e2/e3 (PROTECTIVE - reduced Alzheimer\'s risk)',
'0,2': 'e2/e2 (protective; monitor for type III hyperlipoproteinemia)',
'1,0': 'e3/e4 (increased Alzheimer\'s risk ~3x)',
'1,1': 'e2/e4 (mixed - e2 partially offsets e4 risk)',
}[`${e4},${e2}`] || (e4 >= 2 ? 'e4/e4 (significantly increased Alzheimer\'s risk ~12x)' : `Unusual: rs429358=${gt1}, rs7412=${gt2}`);
return { genotype: geno, rs429358: gt1, rs7412: gt2 };
}
/**
* Run the full 23andMe analysis pipeline.
* @param {string} text - Raw 23andMe file contents
* @returns {object} Full analysis result
*/
function analyze23andMe(text) {
const data = parse23andMe(text);
const gts = new Map();
for (const [rsid, snp] of data.snps) gts.set(rsid, snp.genotype);
const cyp2d6 = callCyp2d6(gts);
const cyp2c19 = callCyp2c19(gts);
const apoe = determineApoe(gts);
// Variant classification
let homozygous = 0, heterozygous = 0, indels = 0;
const isNuc = c => 'ACGT'.includes(c);
for (const snp of data.snps.values()) {
const g = snp.genotype;
if (g.length === 2) {
if (isNuc(g[0]) && isNuc(g[1])) { g[0] === g[1] ? homozygous++ : heterozygous++; }
else indels++;
}
}
return {
data: { ...data, snps: Object.fromEntries(data.snps), chrCounts: Object.fromEntries(data.chrCounts) },
cyp2d6, cyp2c19, apoe,
homozygous, heterozygous, indels,
hetRatio: data.totalMarkers - data.noCalls > 0 ? heterozygous / (data.totalMarkers - data.noCalls) * 100 : 0,
};
}
// -------------------------------------------------------------------
// Biomarker Analysis Engine (v0.3.0 — mirrors biomarker.rs + biomarker_stream.rs)
// -------------------------------------------------------------------
const biomarkerModule = require('./src/biomarker');
const streamModule = require('./src/stream');
module.exports = {
// Original API
encode2bit,
decode2bit,
translateDna,
cosineSimilarity,
fastaToRvdna,
readRvdna,
isNativeAvailable,
// 23andMe Genotyping API (v0.2.0)
normalizeGenotype,
parse23andMe,
callCyp2d6,
callCyp2c19,
determineApoe,
analyze23andMe,
// Biomarker Risk Scoring Engine (v0.3.0)
biomarkerReferences: biomarkerModule.biomarkerReferences,
zScore: biomarkerModule.zScore,
classifyBiomarker: biomarkerModule.classifyBiomarker,
computeRiskScores: biomarkerModule.computeRiskScores,
encodeProfileVector: biomarkerModule.encodeProfileVector,
generateSyntheticPopulation: biomarkerModule.generateSyntheticPopulation,
BIOMARKER_REFERENCES: biomarkerModule.BIOMARKER_REFERENCES,
SNPS: biomarkerModule.SNPS,
INTERACTIONS: biomarkerModule.INTERACTIONS,
CAT_ORDER: biomarkerModule.CAT_ORDER,
// Streaming Biomarker Processor (v0.3.0)
RingBuffer: streamModule.RingBuffer,
StreamProcessor: streamModule.StreamProcessor,
generateReadings: streamModule.generateReadings,
defaultStreamConfig: streamModule.defaultStreamConfig,
BIOMARKER_DEFS: streamModule.BIOMARKER_DEFS,
// Re-export native module for advanced use
native: nativeModule,
};