forked from SsnAgo/PersonalProject-Java
-
Notifications
You must be signed in to change notification settings - Fork 50
Expand file tree
/
Copy pathLib.java
More file actions
173 lines (164 loc) · 6.28 KB
/
Lib.java
File metadata and controls
173 lines (164 loc) · 6.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
package WordCount;
import java.io.*;
import java.util.*;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class Lib {
public static Reader InputFile(String fileName) {
File file = new File(fileName);
Reader reader = null;
try {
reader = new InputStreamReader(new FileInputStream(file));
} catch (FileNotFoundException e) {
System.out.println("找不到输入文件!");
}
return reader;
}
public static BufferedWriter OutputFile(String fileName) throws IOException {
BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(fileName),true),"utf-8"));
return write;
}
public static int CountCharacters(String InputFile, String OutputFile) throws IOException {
Reader reader = InputFile(InputFile);
Writer writer = new FileWriter(OutputFile);
int CharactersNum = 0; //文件字符数
while (reader.read() != -1) //读取到-1时停止。
{
CharactersNum++;
}
writer.write("characters:" + CharactersNum + '\n');
writer.close();
reader.close();
return CharactersNum;
} // 统计字符数。
public static int CountWords(String inputFile, String outputFile) throws IOException {
Reader reader = InputFile(inputFile);
Writer writer = OutputFile(outputFile);
int length; //表示单词的长度,大于等于4合法
int temp;
int WordsNum = 0;
String word = "";
String regex = "[a-zA-Z]{4}[^ ,.]+"; //正则表达式判断是否为四个英文开头
Pattern p = Pattern.compile(regex);
while ((temp = reader.read()) != -1)
{
while ((temp >= 97 && temp <= 122) || (temp >= 65 && temp <= 90) || (temp >= 48 && temp <= 57)) {
word += (char) temp;
temp = reader.read();
}
while ((!(temp >= 97 && temp <= 122) || (temp >= 65 && temp <= 90) || (temp >= 48 && temp <= 57)) && temp != -1) //去除空白字符和分隔符
{
temp = reader.read();
}
Matcher m = p.matcher(word);
length = word.length();
if (length >= 4 && m.matches())
{
WordsNum++;
}
word = "" + (char)temp;
}
writer.append("words: " + WordsNum + '\n');
writer.close();
reader.close();
return WordsNum;
} //统计单词数
public static int CountLines(String inputFile, String outputFile) throws IOException {
Reader reader = InputFile(inputFile);
Writer writer = OutputFile(outputFile);
int temp;
int LinesNum = 0;
String line = "";
while ((temp = reader.read()) != -1)
{
while (temp != -1 && (char) temp != '\n')
{
if (temp != ' ' && temp != '\t' && temp != '\r')
{
line += (char)temp;
}
temp = reader.read();
}
if (line != " ")
{
LinesNum++;
}
line = " ";
}
writer.append("lines:" + LinesNum + "\n");
reader.close();
writer.close();
return LinesNum;
} //统计行数。
public static String WordsNumSort(String inputFile, String outputFile) throws IOException {
Reader reader = InputFile(inputFile);
Writer writer = OutputFile(outputFile);
int temp;
String word = "";
String regex = "[a-zA-Z]{4}[^ ,.]+"; //正则表达式判断是否为四个英文开头
Pattern p = Pattern.compile(regex);
Map<String, Integer> words = new HashMap<String, Integer>();
while ((temp = reader.read()) != -1)
{
while ((temp >= 97 && temp <= 122) || (temp >= 65 && temp <= 90) || (temp >= 48 && temp <= 57))
{
if (temp >= 65 && temp <= 90)
{
temp += 32;
}
word += (char)temp;
temp = reader.read();
}
while ((!(temp >= 97 && temp <= 122) || (temp >= 65 && temp <= 90) || (temp >= 48 && temp <= 57)) && temp != -1)
{
temp = reader.read();
}
Matcher m = p.matcher(word);
if (m.matches()) {
if (words.get(word) == null)
{
words.put(word, Integer.valueOf(1));
}
else
{
words.put(word, Integer.valueOf(words.get(word).intValue() + 1));
}
}
if (temp >= 65 && temp <= 90)
{
temp += 32;
}
word = "" + (char) temp;
} //与统计单词数的方法类似,不合法的单词不进行排序。
Map<String, Integer> WordsSort = words.entrySet().stream().sorted(new Comparator<Map.Entry<String, Integer>>()
{
public int compare(Map.Entry<String, Integer> w1, Map.Entry<String, Integer> w2)
{
if (w1.getValue().equals(w2.getValue()))
{
return w1.getKey().compareTo(w2.getKey());
}
else
{
return w2.getValue().compareTo(w1.getValue());
}
}
}
).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue,(oldValue, newValue) -> oldValue, LinkedHashMap::new)); //对单词频率进行排序
String test = null; //设定频率最低的词以便单元测试
int i = 0;
for (Map.Entry<String, Integer> entry : WordsSort.entrySet())
{
test = entry.getKey();
writer.write(entry.getKey() + ":" + entry.getValue() + "\n");
if (i++ >= 9) {
break;
}
} ////打印频率前十的单词
reader.close();
writer.close();
return test; //返回频率最低的词
}
}