-
Notifications
You must be signed in to change notification settings - Fork 1
/
2-train.js
78 lines (62 loc) · 3.06 KB
/
2-train.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
// 1. Import necessary modules and libraries
import { OpenAI } from 'langchain/llms';
import { RetrievalQAChain } from 'langchain/chains';
import { HNSWLib } from 'langchain/vectorstores';
import { OpenAIEmbeddings } from 'langchain/embeddings';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import * as fs from 'fs';
import * as dotenv from 'dotenv';
// 2. Load environment variables
dotenv.config();
// 3. Set up input data and paths
const folderPath = "./chart2data"; // Assuming all your txt files are in a "texts" folder
const chart =` [<table>#WI 352.001|Method of Cleaning] -> [<start>start] ->[<usecase> operator]-> [pre-soak 10 minutes | cleaning at 40ºC]->
[<end>]
[<start>start] ->[<usecase> inspector] -> [temperature range specified | flushing specifications]->[<end>]
`
const request = `DIAGRAM input: """ ${chart}""" please generate SOURCE output.`;
const VECTOR_STORE_PATH = `chart2data.index`; // Name the vector store based on the whole folder
// 4. Define the main function handleEmbeddings
export const handleEmbeddings = async () => {
// 5. Initialize the OpenAI model with an empty configuration object
const model = new OpenAI({});
// 6. Check if the vector store file exists
let vectorStore;
if (fs.existsSync(VECTOR_STORE_PATH)) {
// 6.1. If the vector store file exists, load it into memory
console.log('Vector Exists..');
vectorStore = await HNSWLib.load(VECTOR_STORE_PATH, new OpenAIEmbeddings());
} else {
// 6.2. If the vector store file doesn't exist, create it
// 6.2.1. Read the input text file
const files = fs.readdirSync(folderPath).filter(file => file.endsWith('.txt'));
// 6.2.2. Read all the files and store their contents in an array
let allTexts = [];
for (let file of files) {
let content = fs.readFileSync(`${folderPath}/${file}`, 'utf8');
allTexts.push(content);
}
// 6.2.3. Create a RecursiveCharacterTextSplitter with a specified chunk size
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
// 6.2.4. Split the input texts into documents
const docs = await textSplitter.createDocuments(allTexts); // Process all texts
// 6.2.5. Create a new vector store from the documents using OpenAIEmbeddings
vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
// 6.2.6. Save the vector store to a file
await vectorStore.save(VECTOR_STORE_PATH);
}
// 7. Create a RetrievalQAChain by passing the initialized OpenAI model and the vector store retriever
const chain = RetrievalQAChain.fromLLM(model, vectorStore.asRetriever());
// 8. Call the RetrievalQAChain with the input question, and store the result in the 'res' variable
const res = await chain.call({
query: request,
});
// 9. Log the result to the console
console.log({ res });
//10 Save to Output.txt
let cleanedText = res.text.split('\n').join(' ');
// Save the cleaned text to output1_.txt
fs.writeFileSync('output_2.txt', cleanedText);
};
// 10. Execute the main function runWithEmbeddings
handleEmbeddings();