Hu guys,
I\'m new to python/anaconda/jupyter/numPy, panda, etc.... so please excuse me if it\'s a really stupid question. I\'m trying to obtain MNIST database by using a
Late to the party, but i had the same error and my simple solution was to run the two commands separately, like:
from sklearn import datasets
and make sure you run this in a separate line in jupyter notebook
mnist_data = datasets.fetch_mldata('MNIST original', data_home = 'datasets/')
I found this solution on https://github.com/ageron/handson-ml/issues/7 and this one was most useful for me. Just download the file from https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat
after that use this script:
from scipy.io import loadmat
mnist_path = "my/local/path/mnist-original.mat" #type the directory where you want to the file is located
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Success!")
from sklearn.datasets import fetch_mldata
try:
mnist = fetch_mldata('MNIST original')
except Exception as ex:
from six.moves import urllib
from scipy.io import loadmat
import os
mnist_path = os.path.join(".", "datasets", "mnist-original.mat")
# download dataset from github.
mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
response = urllib.request.urlopen(mnist_alternative_url)
with open(mnist_path, "wb") as f:
content = response.read()
f.write(content)
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Done!")
Found a good solution here: https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
It downloads dataset from Yan LeCun's website (http://yann.lecun.com/exdb/mnist/).
import os
from urllib import urlretrieve
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
print("Downloading %s" % filename)
urlretrieve(source + filename, filename)
# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip
def load_mnist_images(filename):
if not os.path.exists(filename):
download(filename)
# Read the inputs in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
if not os.path.exists(filename):
download(filename)
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
I also had the same error and had to turn off the firewall. On the Macbook, go System Preferences > Security & Privacy > Firewall > Turn Off Firewall.
Here is an alternative location to download the MNIST dataset (referenced from https://github.com/ageron/handson-ml/blob/master/03_classification.ipynb)
from six.moves import urllib
from sklearn.datasets import fetch_mldata
try:
mnist = fetch_mldata('MNIST original')
except urllib.error.HTTPError as ex:
print("Could not download MNIST data from mldata.org, trying alternative...")
# Alternative method to load MNIST, if mldata.org is down
from scipy.io import loadmat
mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
mnist_path = "./mnist-original.mat"
response = urllib.request.urlopen(mnist_alternative_url)
with open(mnist_path, "wb") as f:
content = response.read()
f.write(content)
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Success!")