|
1 | 1 | package com.google.code.externalsorting; |
2 | 2 |
|
3 | | -// filename: ExternalSort.java |
4 | 3 | import java.io.BufferedReader; |
5 | 4 | import java.io.BufferedWriter; |
6 | 5 | import java.io.EOFException; |
|
48 | 47 | * http://lemire.me/blog/archives/2010/04/01/external-memory-sorting-in-java/ |
49 | 48 | */ |
50 | 49 | public class ExternalSort { |
51 | | - |
52 | | - |
| 50 | + /** |
| 51 | + * Default constructor for ExternalSort. |
| 52 | + * No initialization required. |
| 53 | + */ |
| 54 | + public ExternalSort() { |
| 55 | + } |
| 56 | + /** |
| 57 | + * Affiche l'utilisation du programme sur la sortie standard. |
| 58 | + */ |
53 | 59 | private static void displayUsage() { |
54 | | - System.out |
55 | | - .println("java com.google.externalsorting.ExternalSort inputfile outputfile"); |
| 60 | + System.out.println("java com.google.externalsorting.ExternalSort inputfile outputfile"); |
56 | 61 | System.out.println("Flags are:"); |
57 | 62 | System.out.println("-v or --verbose: verbose output"); |
58 | 63 | System.out.println("-d or --distinct: prune duplicate lines"); |
59 | | - System.out |
60 | | - .println("-t or --maxtmpfiles (followed by an integer): specify an upper bound on the number of temporary files"); |
61 | | - System.out |
62 | | - .println("-c or --charset (followed by a charset code): specify the character set to use (for sorting)"); |
63 | | - System.out |
64 | | - .println("-z or --gzip: use compression for the temporary files"); |
65 | | - System.out |
66 | | - .println("-H or --header (followed by an integer): ignore the first few lines"); |
67 | | - System.out |
68 | | - .println("-s or --store (following by a path): where to store the temporary files"); |
| 64 | + System.out.println("-t or --maxtmpfiles (followed by an integer): specify an upper bound on the number of temporary files"); |
| 65 | + System.out.println("-c or --charset (followed by a charset code): specify the character set to use (for sorting)"); |
| 66 | + System.out.println("-z or --gzip: use compression for the temporary files"); |
| 67 | + System.out.println("-H or --header (followed by an integer): ignore the first few lines"); |
| 68 | + System.out.println("-s or --store (following by a path): where to store the temporary files"); |
69 | 69 | System.out.println("-h or --help: display this message"); |
70 | 70 | } |
71 | 71 |
|
72 | 72 | /** |
73 | | - * This method calls the garbage collector and then returns the free |
74 | | - * memory. This avoids problems with applications where the GC hasn't |
75 | | - * reclaimed memory and reports no available memory. |
76 | | - * |
77 | | - * @return available memory |
| 73 | + * Cette méthode appelle le garbage collector et retourne la mémoire libre. |
| 74 | + * @return mémoire disponible en octets |
78 | 75 | */ |
79 | 76 | public static long estimateAvailableMemory() { |
80 | | - System.gc(); |
81 | | - // http://stackoverflow.com/questions/12807797/java-get-available-memory |
82 | | - Runtime r = Runtime.getRuntime(); |
83 | | - long allocatedMemory = r.totalMemory() - r.freeMemory(); |
84 | | - long presFreeMemory = r.maxMemory() - allocatedMemory; |
85 | | - return presFreeMemory; |
| 77 | + System.gc(); |
| 78 | + Runtime r = Runtime.getRuntime(); |
| 79 | + long allocatedMemory = r.totalMemory() - r.freeMemory(); |
| 80 | + long presFreeMemory = r.maxMemory() - allocatedMemory; |
| 81 | + return presFreeMemory; |
86 | 82 | } |
87 | 83 |
|
88 | 84 | /** |
89 | | - * we divide the file into small blocks. If the blocks are too small, we |
90 | | - * shall create too many temporary files. If they are too big, we shall |
91 | | - * be using too much memory. |
92 | | - * |
93 | | - * @param sizeoffile how much data (in bytes) can we expect |
94 | | - * @param maxtmpfiles how many temporary files can we create (e.g., 1024) |
95 | | - * @param maxMemory Maximum memory to use (in bytes) |
96 | | - * @return the estimate |
| 85 | + * Calcule la taille optimale des blocs pour le tri externe. |
| 86 | + * @param sizeoffile taille du fichier en octets |
| 87 | + * @param maxtmpfiles nombre maximal de fichiers temporaires |
| 88 | + * @param maxMemory mémoire maximale à utiliser |
| 89 | + * @return estimation de la taille du bloc |
97 | 90 | */ |
98 | | - public static long estimateBestSizeOfBlocks(final long sizeoffile, |
99 | | - final int maxtmpfiles, final long maxMemory) { |
100 | | - // we don't want to open up much more than maxtmpfiles temporary |
101 | | - // files, better run |
102 | | - // out of memory first. |
103 | | - long blocksize = sizeoffile / maxtmpfiles |
104 | | - + (sizeoffile % maxtmpfiles == 0 ? 0 : 1); |
105 | | - |
106 | | - // on the other hand, we don't want to create many temporary |
107 | | - // files |
108 | | - // for naught. If blocksize is smaller than half the free |
109 | | - // memory, grow it. |
| 91 | + public static long estimateBestSizeOfBlocks(final long sizeoffile, final int maxtmpfiles, final long maxMemory) { |
| 92 | + long blocksize = sizeoffile / maxtmpfiles + (sizeoffile % maxtmpfiles == 0 ? 0 : 1); |
110 | 93 | if (blocksize < maxMemory / 2) { |
111 | 94 | blocksize = maxMemory / 2; |
112 | 95 | } |
113 | 96 | return blocksize; |
114 | 97 | } |
115 | 98 |
|
116 | 99 | /** |
117 | | - * @param args command line argument |
118 | | - * @throws IOException generic IO exception |
| 100 | + * Main entry point for the external sorting program. |
| 101 | + * @param args command line arguments |
| 102 | + * @throws IOException if an I/O error occurs |
119 | 103 | */ |
120 | 104 | public static void main(final String[] args) throws IOException { |
121 | | - boolean verbose = false; |
122 | | - boolean distinct = false; |
123 | | - int maxtmpfiles = DEFAULTMAXTEMPFILES; |
124 | | - Charset cs = Charset.defaultCharset(); |
125 | | - String inputfile = null, outputfile = null; |
126 | | - File tempFileStore = null; |
127 | | - boolean usegzip = false; |
128 | | - boolean parallel = true; |
129 | | - int headersize = 0; |
130 | | - for (int param = 0; param < args.length; ++param) { |
131 | | - if (args[param].equals("-v") |
132 | | - || args[param].equals("--verbose")) { |
133 | | - verbose = true; |
134 | | - } else if ((args[param].equals("-h") || args[param] |
135 | | - .equals("--help"))) { |
136 | | - displayUsage(); |
137 | | - return; |
138 | | - } else if ((args[param].equals("-d") || args[param] |
139 | | - .equals("--distinct"))) { |
140 | | - distinct = true; |
141 | | - } else if ((args[param].equals("-t") || args[param] |
142 | | - .equals("--maxtmpfiles")) |
143 | | - && args.length > param + 1) { |
144 | | - param++; |
| 105 | + boolean verbose = false; |
| 106 | + boolean distinct = false; |
| 107 | + int maxtmpfiles = DEFAULTMAXTEMPFILES; |
| 108 | + Charset cs = Charset.defaultCharset(); |
| 109 | + String inputfile = null, outputfile = null; |
| 110 | + File tempFileStore = null; |
| 111 | + boolean usegzip = false; |
| 112 | + boolean parallel = true; |
| 113 | + int headersize = 0; |
| 114 | + for (int param = 0; param < args.length; ++param) { |
| 115 | + if (args[param].equals("-v") || args[param].equals("--verbose")) { |
| 116 | + verbose = true; |
| 117 | + } else if ((args[param].equals("-h") || args[param].equals("--help"))) { |
| 118 | + displayUsage(); |
| 119 | + return; |
| 120 | + } else if ((args[param].equals("-d") || args[param].equals("--distinct"))) { |
| 121 | + distinct = true; |
| 122 | + } else if ((args[param].equals("-t") || args[param].equals("--maxtmpfiles")) && args.length > param + 1) { |
| 123 | + param++; |
145 | 124 | maxtmpfiles = Integer.parseInt(args[param]); |
146 | 125 | if (maxtmpfiles < 0) { |
147 | 126 | System.err |
|
0 commit comments