How to open and convert sqlite database to pandas dataframe

后端 未结 5 1526
北海茫月
北海茫月 2020-12-12 19:09

I have downloaded some datas as a sqlite database (data.db) and I want to open this database in python and then convert it into pandas dataframe.

This is so far I ha

相关标签:
5条回答
  • 2020-12-12 19:33

    Search sqlalchemy, engine and database name in google (sqlite in this case):

    import pandas as pd
    import sqlalchemy
    
    db_name = "data.db"
    table_name = "LITTLE_BOBBY_TABLES"
    
    engine = sqlalchemy.create_engine("sqlite:///%s" % db_name, execution_options={"sqlite_raw_colnames": True})
    df = pd.read_sql_table(table_name, engine)
    
    0 讨论(0)
  • 2020-12-12 19:37

    i have stored my data in database.sqlite table name is Reviews

    import sqlite3
    con=sqlite3.connect("database.sqlite")
    
    data=pd.read_sql_query("SELECT * FROM Reviews",con)
    print(data)
    
    0 讨论(0)
  • 2020-12-12 19:39

    The line

    data = sqlite3.connect('data.db')
    

    opens a connection to the database. There are no records queried up to this. So you have to execute a query afterward and provide this to the pandas DataFrame constructor.

    It should look similar to this

    import sqlite3
    import pandas as pd
    
    dat = sqlite3.connect('data.db')
    query = dat.execute("SELECT * From <TABLENAME>")
    cols = [column[0] for column in query.description]
    results= pd.DataFrame.from_records(data = query.fetchall(), columns = cols)
    

    I am not really firm with SQL commands, so you should check the correctness of the query. should be the name of the table in your database.

    0 讨论(0)
  • 2020-12-12 19:53

    I wrote a piece of code up that saves tables in a database file such as .sqlite or .db and creates an excel file out of it with each table as a sheet or makes individual tables into csvs.

    Note: You don't need to know the table names in advance!

    import os, fnmatch
    import sqlite3
    import pandas as pd
    
    #creates a directory without throwing an error
    def create_dir(dir):
      if not os.path.exists(dir):
        os.makedirs(dir)
        print("Created Directory : ", dir)
      else:
        print("Directory already existed : ", dir)
      return dir
    
    #finds files in a directory corresponding to a regex query
    def find(pattern, path):
        result = []
        for root, dirs, files in os.walk(path):
            for name in files:
                if fnmatch.fnmatch(name, pattern):
                    result.append(os.path.join(root, name))
        return result
    
    
    
    #convert sqlite databases(.db,.sqlite) to pandas dataframe(excel with each table as a different sheet or individual csv sheets)
    def save_db(dbpath=None,excel_path=None,csv_path=None,extension="*.sqlite",csvs=True,excels=True):
        if (excels==False and csvs==False):
          print("Atleast one of the parameters need to be true: csvs or excels")
          return -1
    
        #little code to find files by extension
        if dbpath==None:
          files=find(extension,os.getcwd())
          if len(files)>1:
            print("Multiple files found! Selecting the first one found!")
            print("To locate your file, set dbpath=<yourpath>")
          dbpath = find(extension,os.getcwd())[0] if dbpath==None else dbpath
          print("Reading database file from location :",dbpath)
    
        #path handling
    
        external_folder,base_name=os.path.split(os.path.abspath(dbpath))
        file_name=os.path.splitext(base_name)[0] #firstname without .
        exten=os.path.splitext(base_name)[-1]   #.file_extension
    
        internal_folder="Saved_Dataframes_"+file_name
        main_path=os.path.join(external_folder,internal_folder)
        create_dir(main_path)
    
    
        excel_path=os.path.join(main_path,"Excel_Multiple_Sheets.xlsx") if excel_path==None else excel_path
        csv_path=main_path if csv_path==None else csv_path
    
        db = sqlite3.connect(dbpath)
        cursor = db.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cursor.fetchall()
        print(len(tables),"Tables found :")
    
        if excels==True:
          #for writing to excel(xlsx) we will be needing this!
          try:
            import XlsxWriter
          except ModuleNotFoundError:
            !pip install XlsxWriter
    
        if (excels==True and csvs==True):
          writer = pd.ExcelWriter(excel_path, engine='xlsxwriter')
          i=0
          for table_name in tables:
              table_name = table_name[0]
              table = pd.read_sql_query("SELECT * from %s" % table_name, db)
              i+=1
              print("Parsing Excel Sheet ",i," : ",table_name)
              table.to_excel(writer, sheet_name=table_name, index=False)
              print("Parsing CSV File ",i," : ",table_name)
              table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index')
    
          writer.save()
    
    
        elif excels==True:
          writer = pd.ExcelWriter(excel_path, engine='xlsxwriter')
          i=0
          for table_name in tables:
              table_name = table_name[0]
              table = pd.read_sql_query("SELECT * from %s" % table_name, db)
              i+=1
              print("Parsing Excel Sheet ",i," : ",table_name)
              table.to_excel(writer, sheet_name=table_name, index=False)
    
          writer.save()
    
        elif csvs==True:
          i=0
          for table_name in tables:
              table_name = table_name[0]
              table = pd.read_sql_query("SELECT * from %s" % table_name, db)
              i+=1
              print("Parsing CSV File ",i," : ",table_name)
              table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index')
        cursor.close()
        db.close()
        return 0
    save_db(); 
    
    0 讨论(0)
  • 2020-12-12 19:54

    Despite sqlite being part of the Python Standard Library and is a nice and easy interface to SQLite databases, the Pandas tutorial states:

    Note In order to use read_sql_table(), you must have the SQLAlchemy optional dependency installed. http://pandas.pydata.org/pandas-docs/stable/io.html#reading-tables

    But Pandas still supports sqlite3 access if you want to avoid installing SQLAlchemy:

    import sqlite3
    import pandas as pd
    # Create your connection.
    cnx = sqlite3.connect('file.db')
    
    df = pd.read_sql_query("SELECT * FROM table_name", cnx)
    

    But you need to know the name of the used table in advance.

    Hope it helps!

    http://pandas.pydata.org/pandas-docs/stable/io.html#sqlite-fallback

    0 讨论(0)
提交回复
热议问题