Compare commits
16 Commits
47015d8f40
...
main
Author | SHA1 | Date | |
---|---|---|---|
cd4838f692 | |||
25612ab51c | |||
9df1547fce | |||
6efed770d0 | |||
5f485ac73a | |||
6a58b8f48d | |||
9538f78198 | |||
4059d6d868 | |||
1a2358387f | |||
b4d5966550 | |||
008126c5f4 | |||
7021ab967c | |||
dcb9ae2f68 | |||
e5f1227948 | |||
3074606ea5 | |||
c5b4b15559 |
22
README.md
22
README.md
@@ -1,4 +1,24 @@
|
|||||||
# valDict
|
# valDict
|
||||||
A very simple computer readable online dictionary based entirely off of data from **[Wiktionary](https://en.wiktionary.org/wiki/Wiktionary:Main_Page)** that uses data made with **[Wiktextract](https://github.com/tatuylonen/wiktextract)**.
|
A very simple computer readable online dictionary based entirely off of data from **[Wiktionary](https://en.wiktionary.org/wiki/Wiktionary:Main_Page)** that uses data made with **[Wiktextract](https://github.com/tatuylonen/wiktextract)**.
|
||||||
|
|
||||||
valDict can be served using the built in react server that comes with this repo, or your choice of web server.
|
valDict can be served using your choice of web server.
|
||||||
|
|
||||||
|
|
||||||
|
## Building
|
||||||
|
Firstly, you need the "large_dir" option enabled in your filesystem and "dir_index" will also help speed things up, they can be enabled with
|
||||||
|
```bash
|
||||||
|
|
||||||
|
tune2fs -O large_dir /dev/DEVICE
|
||||||
|
tune2fs -O dir_index /dev/DEVICE
|
||||||
|
|
||||||
|
#DEVICE can be found with lsblk
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
IN ADDITION, you need like... ALOT of Inodes like 10 million, i highly recommend making a DEDICATED ext4 partition JUST FOR this dictionary that has an outrageous inode count (remember to set the large_dir and dir_index in the new fs)
|
||||||
|
|
||||||
|
personally i recommend a 40-45GB that is 45,000,000,000 bytes / 10,000,000 goal inodes = 4,500 bytes per inode (waow that is crazy)
|
||||||
|
this is a decently efficent way to patition the format while still leaving a very decent amount of space left for extra files, or whatever future expansion may be required making a filesystem like this would probably require first making an ext4 partition with fdisk, then using mkfs.ext4 -i 3200 to achieve the 3200 bytes per inode ratio...
|
||||||
|
|
||||||
|
building valDict is very easy, just clone the repo and run: `./downloadDict.sh` then run `./cleanbuild.sh` (beware, building takes a LONGGGG time... (has to write 20gb~ worth of files))
|
||||||
|
RUN AT OWN RISK BE CAREFUL CAREFUL CAREFUL
|
@@ -1,3 +1,3 @@
|
|||||||
rm -rf ./dictionary/
|
rm -rfv ./dictionary/
|
||||||
|
|
||||||
node gendictionary
|
node gendictionary
|
@@ -1,6 +1,9 @@
|
|||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const rl = require('readline')
|
const rl = require('readline')
|
||||||
|
|
||||||
|
const trie = true;
|
||||||
|
const trieLevel = 4; //size between cuts for trie ex 4 : "/exam/ple"
|
||||||
|
|
||||||
const dictPath = "./dictionary/";
|
const dictPath = "./dictionary/";
|
||||||
const language = "en"
|
const language = "en"
|
||||||
|
|
||||||
@@ -16,9 +19,9 @@ reader.on('line', (line) => {
|
|||||||
iter++;
|
iter++;
|
||||||
console.log("Iteration Number: "+iter.toString());
|
console.log("Iteration Number: "+iter.toString());
|
||||||
let entry = JSON.parse(line);
|
let entry = JSON.parse(line);
|
||||||
let thispath = path + entry.word + "/" + entry.pos + "/";
|
let thispath = path + getPath(entry.word) + entry.pos + "/";
|
||||||
console.log(thispath);
|
console.log(thispath);
|
||||||
if (!fs.existsSync(thispath)) {
|
if (!fs.existsSync(thispath + "definitions.json")) {
|
||||||
initializeDir(thispath);
|
initializeDir(thispath);
|
||||||
}
|
}
|
||||||
// console.log(entry);
|
// console.log(entry);
|
||||||
@@ -29,10 +32,26 @@ reader.on('line', (line) => {
|
|||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
async function writeThesaurus(thispath, entry) {
|
function getPath(word){
|
||||||
|
let path = "";
|
||||||
|
if (trie){
|
||||||
|
for (let i = 0; i < word.length; i+=trieLevel){
|
||||||
|
for (let n = 0; n < trieLevel; n++){
|
||||||
|
path += word[i+n] ?? "";
|
||||||
|
}
|
||||||
|
path += "/";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
path = word + '/';
|
||||||
|
}
|
||||||
|
|
||||||
|
return path.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeThesaurus(thispath, entry) {
|
||||||
|
|
||||||
}
|
}
|
||||||
async function writeSounds(thispath, entry) {
|
function writeSounds(thispath, entry) {
|
||||||
var sounds = JSON.parse(fs.readFileSync(thispath+"sounds.json",'utf-8'));
|
var sounds = JSON.parse(fs.readFileSync(thispath+"sounds.json",'utf-8'));
|
||||||
|
|
||||||
if (entry.sounds == null){
|
if (entry.sounds == null){
|
||||||
@@ -51,8 +70,9 @@ async function writeSounds(thispath, entry) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
fs.writeFileSync(thispath+"sounds.json",JSON.stringify(sounds));
|
fs.writeFileSync(thispath+"sounds.json",JSON.stringify(sounds));
|
||||||
|
sounds = null;
|
||||||
}
|
}
|
||||||
async function writeDefinitions(thispath, entry) {
|
function writeDefinitions(thispath, entry) {
|
||||||
|
|
||||||
var definitions = JSON.parse(fs.readFileSync(thispath+"definitions.json",'utf-8'));
|
var definitions = JSON.parse(fs.readFileSync(thispath+"definitions.json",'utf-8'));
|
||||||
|
|
||||||
@@ -67,7 +87,7 @@ async function writeDefinitions(thispath, entry) {
|
|||||||
definitions.glosses.push(ele.glosses);
|
definitions.glosses.push(ele.glosses);
|
||||||
});
|
});
|
||||||
fs.writeFileSync(thispath+"definitions.json", JSON.stringify(definitions));
|
fs.writeFileSync(thispath+"definitions.json", JSON.stringify(definitions));
|
||||||
|
definitions = null;
|
||||||
}
|
}
|
||||||
function initializeDir(path) {
|
function initializeDir(path) {
|
||||||
fs.mkdirSync(path, {recursive:true});
|
fs.mkdirSync(path, {recursive:true});
|
||||||
|
Reference in New Issue
Block a user