问题
I'm working on a pair of classes that queue up a large quantity of objects, and then allow for that queue to be iterated over, using Java Serialization/ ObjectOutputStream & ObjectInputStream. The quantity of data needed in the queue is in excess of 50GB, hence the need for file IO. The problem is that a file that takes 2ms to write takes ~2 seconds to read. I've tried BufferedInputStream, and each reader has a dedicated thread to keep the input buffer full.
private File file;
private FileInputStream fileStream;
private BufferedInputStream buffer;
private ObjectInputStream stream;
......
this.file = file;
this.fileStream = new FileInputStream(this.file);
this.buffer = new BufferedInputStream(this.fileStream, 2^20); //Tried multiple depths, does not change behavior
this.stream = new ObjectInputStream(this.buffer);
.....
long startTime = System.currentTimeMillis();
LinkedList<T> list = (LinkedList<T>) stream.readUnshared();
long endTime = System.currentTimeMillis();
totalReadTime += endTime - startTime;
The class that populates the serialized file is inserting LinkedLists with 10000 elements. When testing this with Integers the average time for the stream.readUnshared() call is 214.7ms. For comparison purposes the total write time for any given test is at most 3ms, where a test is usually writing ~10x LinkedList with 10,000 elements to a file.
I've included all of my source and my performance test program. Here's my sample output:
Hello
This
Is
A
Total time spent reading: 0
Final
Test
Average time spent reading: 0.0
Starting test: 0
Threads Created.
Total write time: 3
Total time spent reading: 2256
Average time spent reading: 225.6
Total read time: 2030
Threads Joined. Total time: 2265
Starting test: 1
Threads Created.
Total write time: 2
Total time spent reading: 2147
Average time spent reading: 214.7
Total read time: 1961
Threads Joined. Total time: 2153
Starting test: 2
Threads Created.
Total write time: 1
Total time spent reading: 2143
Average time spent reading: 214.3
Total read time: 1957
Threads Joined. Total time: 2149
I don't do this professionally and can take constructive feedback. Thank you for looking at it.
//Serial Object Writer Class
package utils;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
public class SerialObjFileCollector<T extends Serializable> extends Thread {
private ObjectOutputStream stream;
private FileOutputStream fileStream;
private File tempFile;
private AtomicBoolean isOpen;
private AtomicInteger flushCount;
private AtomicLong count;
private LinkedList<T> objects;
private int sendThreshold = 10000;
private ConcurrentLinkedQueue<LinkedList<T>> sendQueue;
public SerialObjFileCollector() {
this.initialize();
}
private void initialize() {
try {
tempFile = File.createTempFile("binary_data", ".SerialObjFileCollector");
tempFile.deleteOnExit();
fileStream = new FileOutputStream(tempFile);
stream = new ObjectOutputStream(fileStream);
isOpen = new AtomicBoolean(true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
isOpen = new AtomicBoolean(false);
}
objects = new LinkedList<T>();
sendQueue = new ConcurrentLinkedQueue<LinkedList<T>>();
flushCount = new AtomicInteger(0);
count = new AtomicLong(0);
this.start();
}
public void writeObject(T object) {
if(!isOpen.compareAndExchange(false, false)) throw new UnsupportedOperationException("Cannot write objects to a closed SerialObjFileCollector!");
objects.add(object);
if(objects.size() > sendThreshold) {
this.sendQueue.add(objects);
synchronized (sendQueue) {sendQueue.notifyAll();}
objects = new LinkedList<T>();
}
}
public void writeObjects(Iterable<T> objects) {
for(T object : objects) {
this.writeObject(object);
}
}
public long size() {
return count.get();
}
public SerialObjFileReader<T> getReader() {
if(isOpen.compareAndExchange(true, true)) {
flush(true);
try {
fileStream.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return new SerialObjFileReader<T>(tempFile,count.get(),sendThreshold);
}
public void flush() {
this.flush(false);
}
private void flush(boolean close) {
this.sendQueue.add(objects);
flushCount.incrementAndGet();
isOpen.set(!close);
objects = new LinkedList<T>();
try {
synchronized (sendQueue) {sendQueue.notifyAll();}
synchronized (flushCount) {flushCount.wait();}
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void run(){
//Keep the thread open even when closing and flushing
while(isOpen.compareAndExchange(true, true) || flushCount.get() > 0) {
if(sendQueue.isEmpty() && flushCount.get() == 0) try {
synchronized (sendQueue) {sendQueue.wait();}
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
//Write out the Queue
while(!this.sendQueue.isEmpty()) {
LinkedList<T> toSend = sendQueue.poll();
try {
stream.writeUnshared(toSend);
stream.reset();
count.addAndGet(toSend.size());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
};
}
//Flush the streams
if(flushCount.get() > 0) {
try {
stream.flush();
fileStream.flush();
flushCount.decrementAndGet();
synchronized (flushCount) {flushCount.notifyAll();}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
synchronized (flushCount) {flushCount.notifyAll();}
}
}
//Serial Object Reader Class
package utils;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicLong;
public class SerialObjFileReader<T extends Serializable> implements Iterable<T> {
private File file;
private long count;
private int maxSize;
public SerialObjFileReader(File file, long count, int maxSize) {
this.initialize(file,count,maxSize);
}
private void initialize(File file, long count, int maxSize) {
this.file = file;
this.count = count;
this.maxSize = maxSize;
}
@Override
public Iterator<T> iterator() {
return new SerialObjFileIterator<T>(file,count,maxSize);
}
public long size() {
return count;
}
public static class SerialObjFileIterator<T> extends Thread implements Iterator<T> {
private File file;
private FileInputStream fileStream;
private BufferedInputStream buffer;
private ObjectInputStream stream;
private long count;
private int maxSize;
private AtomicLong dequeCount;
private Iterator<T> localIterator;
private ConcurrentLinkedQueue<Iterator<T>> rcvQueue;
private long totalReadTime = 0;
private int readCount = 0;
public SerialObjFileIterator(File file, long count, int maxSize) {
this.initialize(file,count,maxSize);
}
private void initialize(File file, long count, int maxSize) {
this.count = 0;
try {
this.file = file;
this.fileStream = new FileInputStream(this.file);
this.buffer = new BufferedInputStream(this.fileStream, 2^20);
this.stream = new ObjectInputStream(this.buffer);
this.count = count;
this.maxSize = maxSize;
this.dequeCount = new AtomicLong(count);
this.rcvQueue = new ConcurrentLinkedQueue<Iterator<T>>();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if(this.count > 0) localIterator = readNewIterator();
this.start();
}
@Override
public boolean hasNext() {
return remaining() > 0 || (localIterator == null ? false : localIterator.hasNext()); //Probably some redundancy here
}
@Override
public T next() {
T result = localIterator.next();
count--;
if(!localIterator.hasNext() && count > 0) localIterator = pullNewIterator();
return result;
}
private Iterator<T> pullNewIterator() {
synchronized (rcvQueue) {rcvQueue.notifyAll();} //Wake the thread regardless
if(!rcvQueue.isEmpty()) {
return rcvQueue.poll();
} else if (count > 0){
try {
synchronized (dequeCount) {dequeCount.wait();}
return rcvQueue.poll();
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
return null;
}
private Iterator<T> readNewIterator() {
if(dequeCount.get() > 0) try {
long startTime = System.currentTimeMillis();
LinkedList<T> list = (LinkedList<T>) stream.readUnshared();
long endTime = System.currentTimeMillis();
totalReadTime += endTime - startTime;
readCount++;
dequeCount.set( Math.max(dequeCount.get() - maxSize, 0));
return list.iterator();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(1);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(1);
}
return null;
}
public long remaining() {
return count;
}
public void run() {
while(dequeCount.get() > 0) {
while(rcvQueue.size() < 3 && dequeCount.get() > 0) {
rcvQueue.add(readNewIterator());
}
synchronized (dequeCount) {dequeCount.notifyAll();} //Wake the thread regardless
if(dequeCount.get() > 0) try {
synchronized (rcvQueue) {rcvQueue.wait();}
} catch (InterruptedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
System.out.println("\t\t\tTotal time spent reading: " + totalReadTime);
System.out.println("\t\t\tAverage time spent reading: " + ((double) totalReadTime / ((double) readCount)));
}
}
}
//Serial Object Performance Test Class
package utils;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.concurrent.atomic.AtomicReference;
public class SerialObjTester {
public static void main(String[] args) {
SerialObjFileCollector<String> collector = new SerialObjFileCollector<String>();
collector.writeObject("Hello");
collector.writeObject("This");
collector.writeObject("Is");
collector.writeObject("A");
collector.writeObject("Final");
collector.writeObject("Test");
for(String str : collector.getReader()) {
System.out.println(str);
}
//Throughput Tests
final int LIST_SIZE = 100000;
final int WRITE_COUNT = 100000;
SerialObjFileCollector<LinkedList<Integer>> intCollector = new SerialObjFileCollector<LinkedList<Integer>>();
LinkedList<Integer> intList = new LinkedList<Integer>();
for(int count = 0; count < LIST_SIZE; count++) intList.add(count);
//Populate initial collector
for(int count = 0; count < WRITE_COUNT; count++) intCollector.writeObject(intList);
AtomicReference<SerialObjFileCollector<LinkedList<Integer>>> intCollectorRef = new AtomicReference<SerialObjFileCollector<LinkedList<Integer>>>(intCollector);
for(int testCount = 0; testCount < 100; testCount++) {
System.out.println("Starting test: " + testCount);
SerialObjFileReader<LinkedList<Integer>> intReader = intCollectorRef.get().getReader();
Thread readerThread = new Thread() {
public void run() {
Iterator<LinkedList<Integer>> intListIter = intReader.iterator();
LinkedList<Integer> list = null;
final long startTime = System.currentTimeMillis();
while(intListIter.hasNext()) {
list = intListIter.next();
}
final long endTime = System.currentTimeMillis();
int testInt = 0;
for(int compareInt : list) if(compareInt != testInt++) System.err.println("\t\tInteger List Inconsistency found!");
System.out.println("\tTotal read time: " + (endTime - startTime) );
}
};
Thread writerThread = new Thread() {
public SerialObjFileCollector<LinkedList<Integer>> localIntCollector;
public void run() {
localIntCollector = new SerialObjFileCollector<LinkedList<Integer>>();
final long startTime = System.currentTimeMillis();
for(int count = 0; count < WRITE_COUNT; count++) localIntCollector.writeObject(intList);
final long endTime = System.currentTimeMillis();
intCollectorRef.set(localIntCollector);
System.out.println("\tTotal write time: " + (endTime - startTime) );
}
};
System.out.println("\tThreads Created.");
final long startTime = System.currentTimeMillis();
readerThread.start();
writerThread.start();
try {
readerThread.join();
writerThread.join();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
final long endTime = System.currentTimeMillis();
System.out.println("\tThreads Joined. Total time: " + (endTime - startTime) );
}
}
}
来源:https://stackoverflow.com/questions/61720808/why-is-objectinputstream-readunshared-1000x-slower-than-objectoutputstream-wri