Compare commits

..

16 Commits

Author SHA1 Message Date
cd4838f692 Update README.md 2025-06-29 04:21:52 +00:00
25612ab51c Update README.md 2025-06-28 22:32:40 +00:00
9df1547fce fixed a edge case bug where existence check would succed even when it shouldnt when using the trie 2025-06-28 13:15:10 -04:00
6efed770d0 optimized trie and added optional configuration :3 2025-06-28 12:53:03 -04:00
5f485ac73a Update README.md 2025-06-28 16:08:37 +00:00
6a58b8f48d Update README.md 2025-06-28 07:36:46 +00:00
9538f78198 Update README.md 2025-06-28 07:32:55 +00:00
4059d6d868 Update README.md 2025-06-28 07:10:02 +00:00
1a2358387f Update README.md 2025-06-28 07:01:08 +00:00
b4d5966550 path splitting 2025-06-28 02:31:30 -04:00
008126c5f4 Update README.md 2025-06-28 03:43:14 +00:00
7021ab967c made cleanbuild more verbose 2025-06-25 05:12:09 -04:00
dcb9ae2f68 optimized compiler for GC 2025-06-25 04:32:13 -04:00
e5f1227948 Update README.md 2025-06-25 07:04:10 +00:00
3074606ea5 Update README.md 2025-06-25 06:46:17 +00:00
c5b4b15559 Update README.md 2025-06-25 06:44:25 +00:00
3 changed files with 48 additions and 8 deletions

View File

@@ -1,4 +1,24 @@
# valDict
A very simple computer readable online dictionary based entirely off of data from **[Wiktionary](https://en.wiktionary.org/wiki/Wiktionary:Main_Page)** that uses data made with **[Wiktextract](https://github.com/tatuylonen/wiktextract)**.
valDict can be served using the built in react server that comes with this repo, or your choice of web server.
valDict can be served using your choice of web server.
## Building
Firstly, you need the "large_dir" option enabled in your filesystem and "dir_index" will also help speed things up, they can be enabled with
```bash
tune2fs -O large_dir /dev/DEVICE
tune2fs -O dir_index /dev/DEVICE
#DEVICE can be found with lsblk
```
IN ADDITION, you need like... ALOT of Inodes like 10 million, i highly recommend making a DEDICATED ext4 partition JUST FOR this dictionary that has an outrageous inode count (remember to set the large_dir and dir_index in the new fs)
personally i recommend a 40-45GB that is 45,000,000,000 bytes / 10,000,000 goal inodes = 4,500 bytes per inode (waow that is crazy)
this is a decently efficent way to patition the format while still leaving a very decent amount of space left for extra files, or whatever future expansion may be required making a filesystem like this would probably require first making an ext4 partition with fdisk, then using mkfs.ext4 -i 3200 to achieve the 3200 bytes per inode ratio...
building valDict is very easy, just clone the repo and run: `./downloadDict.sh` then run `./cleanbuild.sh` (beware, building takes a LONGGGG time... (has to write 20gb~ worth of files))
RUN AT OWN RISK BE CAREFUL CAREFUL CAREFUL

View File

@@ -1,3 +1,3 @@
rm -rf ./dictionary/
rm -rfv ./dictionary/
node gendictionary

View File

@@ -1,6 +1,9 @@
const fs = require('fs');
const rl = require('readline')
const trie = true;
const trieLevel = 4; //size between cuts for trie ex 4 : "/exam/ple"
const dictPath = "./dictionary/";
const language = "en"
@@ -16,9 +19,9 @@ reader.on('line', (line) => {
iter++;
console.log("Iteration Number: "+iter.toString());
let entry = JSON.parse(line);
let thispath = path + entry.word + "/" + entry.pos + "/";
let thispath = path + getPath(entry.word) + entry.pos + "/";
console.log(thispath);
if (!fs.existsSync(thispath)) {
if (!fs.existsSync(thispath + "definitions.json")) {
initializeDir(thispath);
}
// console.log(entry);
@@ -29,10 +32,26 @@ reader.on('line', (line) => {
});
async function writeThesaurus(thispath, entry) {
function getPath(word){
let path = "";
if (trie){
for (let i = 0; i < word.length; i+=trieLevel){
for (let n = 0; n < trieLevel; n++){
path += word[i+n] ?? "";
}
path += "/";
}
} else {
path = word + '/';
}
return path.toLowerCase();
}
function writeThesaurus(thispath, entry) {
}
async function writeSounds(thispath, entry) {
function writeSounds(thispath, entry) {
var sounds = JSON.parse(fs.readFileSync(thispath+"sounds.json",'utf-8'));
if (entry.sounds == null){
@@ -51,8 +70,9 @@ async function writeSounds(thispath, entry) {
}
});
fs.writeFileSync(thispath+"sounds.json",JSON.stringify(sounds));
sounds = null;
}
async function writeDefinitions(thispath, entry) {
function writeDefinitions(thispath, entry) {
var definitions = JSON.parse(fs.readFileSync(thispath+"definitions.json",'utf-8'));
@@ -67,7 +87,7 @@ async function writeDefinitions(thispath, entry) {
definitions.glosses.push(ele.glosses);
});
fs.writeFileSync(thispath+"definitions.json", JSON.stringify(definitions));
definitions = null;
}
function initializeDir(path) {
fs.mkdirSync(path, {recursive:true});