Skip to content

Commit 9f85287

Browse files
committed
adding comments
1 parent e0ba20d commit 9f85287

7 files changed

Lines changed: 259 additions & 105 deletions

File tree

src/main/java/com/google/code/externalsorting/BinaryFileBuffer.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,41 @@
99
*
1010
*/
1111
public final class BinaryFileBuffer implements IOStringStack {
12+
/**
13+
* Constructs a BinaryFileBuffer wrapping the given BufferedReader.
14+
* @param r the BufferedReader to wrap
15+
* @throws IOException if an I/O error occurs
16+
*/
1217
public BinaryFileBuffer(BufferedReader r) throws IOException {
1318
this.fbr = r;
1419
reload();
1520
}
21+
/**
22+
* Closes the underlying BufferedReader.
23+
* @throws IOException if an I/O error occurs
24+
*/
1625
public void close() throws IOException {
1726
this.fbr.close();
1827
}
19-
28+
/**
29+
* Checks if the buffer is empty.
30+
* @return true if there are no more lines to read
31+
*/
2032
public boolean empty() {
2133
return this.cache == null;
2234
}
23-
35+
/**
36+
* Returns the next line in the buffer without removing it.
37+
* @return the next line as a String, or null if empty
38+
*/
2439
public String peek() {
2540
return this.cache;
2641
}
27-
42+
/**
43+
* Removes and returns the next line in the buffer.
44+
* @return the next line as a String
45+
* @throws IOException if an I/O error occurs
46+
*/
2847
public String pop() throws IOException {
2948
String answer = peek().toString();// make a copy
3049
reload();

src/main/java/com/google/code/externalsorting/ExternalSort.java

Lines changed: 51 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package com.google.code.externalsorting;
22

3-
// filename: ExternalSort.java
43
import java.io.BufferedReader;
54
import java.io.BufferedWriter;
65
import java.io.EOFException;
@@ -48,100 +47,80 @@
4847
* http://lemire.me/blog/archives/2010/04/01/external-memory-sorting-in-java/
4948
*/
5049
public class ExternalSort {
51-
52-
50+
/**
51+
* Default constructor for ExternalSort.
52+
* No initialization required.
53+
*/
54+
public ExternalSort() {
55+
}
56+
/**
57+
* Affiche l'utilisation du programme sur la sortie standard.
58+
*/
5359
private static void displayUsage() {
54-
System.out
55-
.println("java com.google.externalsorting.ExternalSort inputfile outputfile");
60+
System.out.println("java com.google.externalsorting.ExternalSort inputfile outputfile");
5661
System.out.println("Flags are:");
5762
System.out.println("-v or --verbose: verbose output");
5863
System.out.println("-d or --distinct: prune duplicate lines");
59-
System.out
60-
.println("-t or --maxtmpfiles (followed by an integer): specify an upper bound on the number of temporary files");
61-
System.out
62-
.println("-c or --charset (followed by a charset code): specify the character set to use (for sorting)");
63-
System.out
64-
.println("-z or --gzip: use compression for the temporary files");
65-
System.out
66-
.println("-H or --header (followed by an integer): ignore the first few lines");
67-
System.out
68-
.println("-s or --store (following by a path): where to store the temporary files");
64+
System.out.println("-t or --maxtmpfiles (followed by an integer): specify an upper bound on the number of temporary files");
65+
System.out.println("-c or --charset (followed by a charset code): specify the character set to use (for sorting)");
66+
System.out.println("-z or --gzip: use compression for the temporary files");
67+
System.out.println("-H or --header (followed by an integer): ignore the first few lines");
68+
System.out.println("-s or --store (following by a path): where to store the temporary files");
6969
System.out.println("-h or --help: display this message");
7070
}
7171

7272
/**
73-
* This method calls the garbage collector and then returns the free
74-
* memory. This avoids problems with applications where the GC hasn't
75-
* reclaimed memory and reports no available memory.
76-
*
77-
* @return available memory
73+
* Cette méthode appelle le garbage collector et retourne la mémoire libre.
74+
* @return mémoire disponible en octets
7875
*/
7976
public static long estimateAvailableMemory() {
80-
System.gc();
81-
// http://stackoverflow.com/questions/12807797/java-get-available-memory
82-
Runtime r = Runtime.getRuntime();
83-
long allocatedMemory = r.totalMemory() - r.freeMemory();
84-
long presFreeMemory = r.maxMemory() - allocatedMemory;
85-
return presFreeMemory;
77+
System.gc();
78+
Runtime r = Runtime.getRuntime();
79+
long allocatedMemory = r.totalMemory() - r.freeMemory();
80+
long presFreeMemory = r.maxMemory() - allocatedMemory;
81+
return presFreeMemory;
8682
}
8783

8884
/**
89-
* we divide the file into small blocks. If the blocks are too small, we
90-
* shall create too many temporary files. If they are too big, we shall
91-
* be using too much memory.
92-
*
93-
* @param sizeoffile how much data (in bytes) can we expect
94-
* @param maxtmpfiles how many temporary files can we create (e.g., 1024)
95-
* @param maxMemory Maximum memory to use (in bytes)
96-
* @return the estimate
85+
* Calcule la taille optimale des blocs pour le tri externe.
86+
* @param sizeoffile taille du fichier en octets
87+
* @param maxtmpfiles nombre maximal de fichiers temporaires
88+
* @param maxMemory mémoire maximale à utiliser
89+
* @return estimation de la taille du bloc
9790
*/
98-
public static long estimateBestSizeOfBlocks(final long sizeoffile,
99-
final int maxtmpfiles, final long maxMemory) {
100-
// we don't want to open up much more than maxtmpfiles temporary
101-
// files, better run
102-
// out of memory first.
103-
long blocksize = sizeoffile / maxtmpfiles
104-
+ (sizeoffile % maxtmpfiles == 0 ? 0 : 1);
105-
106-
// on the other hand, we don't want to create many temporary
107-
// files
108-
// for naught. If blocksize is smaller than half the free
109-
// memory, grow it.
91+
public static long estimateBestSizeOfBlocks(final long sizeoffile, final int maxtmpfiles, final long maxMemory) {
92+
long blocksize = sizeoffile / maxtmpfiles + (sizeoffile % maxtmpfiles == 0 ? 0 : 1);
11093
if (blocksize < maxMemory / 2) {
11194
blocksize = maxMemory / 2;
11295
}
11396
return blocksize;
11497
}
11598

11699
/**
117-
* @param args command line argument
118-
* @throws IOException generic IO exception
100+
* Main entry point for the external sorting program.
101+
* @param args command line arguments
102+
* @throws IOException if an I/O error occurs
119103
*/
120104
public static void main(final String[] args) throws IOException {
121-
boolean verbose = false;
122-
boolean distinct = false;
123-
int maxtmpfiles = DEFAULTMAXTEMPFILES;
124-
Charset cs = Charset.defaultCharset();
125-
String inputfile = null, outputfile = null;
126-
File tempFileStore = null;
127-
boolean usegzip = false;
128-
boolean parallel = true;
129-
int headersize = 0;
130-
for (int param = 0; param < args.length; ++param) {
131-
if (args[param].equals("-v")
132-
|| args[param].equals("--verbose")) {
133-
verbose = true;
134-
} else if ((args[param].equals("-h") || args[param]
135-
.equals("--help"))) {
136-
displayUsage();
137-
return;
138-
} else if ((args[param].equals("-d") || args[param]
139-
.equals("--distinct"))) {
140-
distinct = true;
141-
} else if ((args[param].equals("-t") || args[param]
142-
.equals("--maxtmpfiles"))
143-
&& args.length > param + 1) {
144-
param++;
105+
boolean verbose = false;
106+
boolean distinct = false;
107+
int maxtmpfiles = DEFAULTMAXTEMPFILES;
108+
Charset cs = Charset.defaultCharset();
109+
String inputfile = null, outputfile = null;
110+
File tempFileStore = null;
111+
boolean usegzip = false;
112+
boolean parallel = true;
113+
int headersize = 0;
114+
for (int param = 0; param < args.length; ++param) {
115+
if (args[param].equals("-v") || args[param].equals("--verbose")) {
116+
verbose = true;
117+
} else if ((args[param].equals("-h") || args[param].equals("--help"))) {
118+
displayUsage();
119+
return;
120+
} else if ((args[param].equals("-d") || args[param].equals("--distinct"))) {
121+
distinct = true;
122+
} else if ((args[param].equals("-t") || args[param].equals("--maxtmpfiles")) && args.length > param + 1) {
123+
param++;
145124
maxtmpfiles = Integer.parseInt(args[param]);
146125
if (maxtmpfiles < 0) {
147126
System.err

src/main/java/com/google/code/externalsorting/IOStringStack.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,29 @@
77
* so that users of the library can roll their own.
88
*/
99
public interface IOStringStack {
10+
/**
11+
* Closes the underlying resource.
12+
* @throws IOException if an I/O error occurs
13+
*/
1014
public void close() throws IOException;
1115

16+
/**
17+
* Checks if the stack is empty.
18+
* @return true if empty, false otherwise
19+
*/
1220
public boolean empty();
1321

22+
/**
23+
* Returns the next element without removing it.
24+
* @return the next element as a String
25+
*/
1426
public String peek();
1527

28+
/**
29+
* Removes and returns the next element.
30+
* @return the next element as a String
31+
* @throws IOException if an I/O error occurs
32+
*/
1633
public String pop() throws IOException;
1734

1835
}

src/main/java/com/google/code/externalsorting/csv/CSVRecordBuffer.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
import org.apache.commons.csv.CSVParser;
77
import org.apache.commons.csv.CSVRecord;
88

9+
/**
10+
* Buffer wrapper for CSVRecord iteration and management.
11+
* Handles reading and closing of CSVParser resources.
12+
*/
913
public class CSVRecordBuffer {
1014

1115
private Iterator<CSVRecord> iterator;
@@ -14,25 +18,48 @@ public class CSVRecordBuffer {
1418

1519
private CSVRecord cache;
1620

21+
/**
22+
* Constructs a CSVRecordBuffer wrapping the given CSVParser.
23+
* @param parser the CSVParser to wrap
24+
* @throws IOException if an I/O error occurs
25+
* @throws ClassNotFoundException if a class cannot be found
26+
*/
1727
public CSVRecordBuffer(CSVParser parser) throws IOException, ClassNotFoundException {
1828
this.iterator = parser.iterator();
1929
this.parser = parser;
2030
reload();
2131
}
2232

33+
/**
34+
* Closes the underlying CSVParser.
35+
* @throws IOException if an I/O error occurs
36+
*/
2337
public void close() throws IOException {
2438
this.parser.close();
2539
}
2640

41+
/**
42+
* Checks if the buffer is empty.
43+
* @return true if there are no more records to read
44+
*/
2745
public boolean empty() {
2846
return this.cache == null;
2947
}
3048

49+
/**
50+
* Returns the next CSVRecord in the buffer without removing it.
51+
* @return the next CSVRecord, or null if empty
52+
*/
3153
public CSVRecord peek() {
3254
return this.cache;
3355
}
3456

35-
//
57+
/**
58+
* Removes and returns the next CSVRecord in the buffer.
59+
* @return the next CSVRecord
60+
* @throws IOException if an I/O error occurs
61+
* @throws ClassNotFoundException if a class cannot be found
62+
*/
3663
public CSVRecord pop() throws IOException, ClassNotFoundException {
3764
CSVRecord answer = peek();// make a copy
3865
reload();

0 commit comments

Comments
 (0)