Use an LLM Gateway to Augment Content
Covered
OLlama
MSTY
Fabric
javascript
const { exec } = require('child_process');
const util = require('util');
const fs = require('fs');
const path = require('path');
const execPromise = util.promisify(exec);
const outputDirectory = 'src/data/01_lossless-run';
function extractVideoId(url) {
const regex = /(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))([^?&]+)/;
const matches = url.match(regex);
return matches ? matches[1] : null;
}
function getYoutubeUrls() {
try {
console.log('Attempting to read youtube-urls.json...');
// Log current working directory
console.log('Current working directory:', process.cwd());
const filePath = 'src/content/data/youtube-urls.json';
console.log('Looking for file at:', path.resolve(filePath));
// Check if file exists
if (!fs.existsSync(filePath)) {
console.error('Error: youtube-urls.json file not found');
return [];
}
const fileContent = fs.readFileSync(filePath, 'utf8');
console.log('File read successfully, parsing JSON...');
const jsonData = JSON.parse(fileContent);
console.log(`Found ${jsonData.length} entries in JSON file`);
const allUrls = jsonData
.reduce((urls, item) => [...urls, ...(item.youtube_urls || [])], [])
.filter((url, index, self) => self.indexOf(url) === index);
console.log(`Extracted ${allUrls.length} unique YouTube URLs`);
return allUrls;
} catch (error) {
console.error('Error in getYoutubeUrls:', error);
return [];
}
}
async function processYoutubeUrl(url) {
console.log(`Processing URL: ${url}`);
const thisYoutubeUrl = url;
const frontMatterMark = `---`
const TIMEOUT_MS = 180000; // 3 minutes in milliseconds
try {
const videoId = extractVideoId(url);
if (!videoId) {
throw new Error(`Could not extract video ID from URL: ${url}`);
}
const frontMatterOutput =
`${frontMatterMark}\nthis_video_url: ${thisYoutubeUrl}\nthis_video_id: ${videoId}\n${frontMatterMark}`;
const outputFile = path.join(outputDirectory, `juice_from_${videoId}.md`);
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const logFile = path.join(outputDirectory, `${videoId}_log_${timestamp}.md`);
const command = `fabric -y ${url} --stream --pattern extract_lossless_essentials`;
console.log(`Executing: ${command}`);
console.log(`Saving output to: ${outputFile}`);
// Create a promise that rejects after timeout
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => {
reject(new Error(`Operation timed out after ${TIMEOUT_MS/1000} seconds`));
}, TIMEOUT_MS);
});
// Race between the actual operation and the timeout
const { stdout, stderr } = await Promise.race([
execPromise(command),
timeoutPromise
]);
fullFileOutput = `${frontMatterOutput}\n${stdout}`;
fs.writeFileSync(outputFile, fullFileOutput);
if (stderr) {
fs.writeFileSync(logFile, stderr);
console.log(`Saved error logs to: ${logFile}`);
}
console.log(`Finished processing: ${url}`);
return outputFile;
} catch (error) {
console.error(`Error processing URL ${url}:`, error.message);
const errorLogFile = path.join(
outputDirectory,
`error_${new Date().toISOString().replace(/[:.]/g, '-')}.log`
);
fs.writeFileSync(errorLogFile, `URL: ${url}\nError: ${error.message}\n${error.stack}`);
console.error(`Error details written to: ${errorLogFile}`);
// If it's a timeout, we should kill the fabric process
if (error.message.includes('timed out')) {
try {
// Kill any running fabric process
exec('pkill -f fabric');
console.log('Killed hanging fabric process');
} catch (killError) {
console.error('Error killing fabric process:', killError);
}
}
return null;
}
}
async function main() {
const urls = getYoutubeUrls();
console.log(`Starting to process ${urls.length} URLs...`);
for (const url of urls) {
await processYoutubeUrl(url);
}
console.log('Finished processing all URLs');
}
// Execute the main function
main().catch(error => {
console.error('Error in main execution:', error);
process.exit(1);
});