We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents 4642a80 + b6dc764 commit bec4078Copy full SHA for bec4078
2 files changed
configs/japanese_job_v2_oscar2323_0-100.yaml
@@ -0,0 +1,21 @@
1
+base_dir: 'PROJECT_DIR/datasets/'
2
+targets:
3
+ - "oscar2323_0-100"
4
+output_dir: 'PROJECT_DIR/datasets/oscar2323_0-100_QFv2'
5
+
6
+n_dist: 128
7
+n_output: 1
8
+is_cluster: True
9
+is_local: False
10
11
+use_column: "content"
12
+min_doc_len: 50
13
+max_doc_len: 100000
14
+min_mean_word_len: 1
15
+max_mean_word_len: 10
16
+symbol_to_word_ratio: 0.1
17
+bullet_point_ratio: 0.9
18
+ellipsis_ratio: 0.3
19
+japanese_word_ratio: 0.8
20
+freq_char_cnt: 1
21
+separator_ratio: 0.1
requirements-ja.txt
@@ -5,4 +5,5 @@ bs4
html2text
python-stdnum
numpy
-SudachiPy==0.5.4
+SudachiPy==0.5.4
+SudachiDict-core
0 commit comments