greenplum--数据导入和导出

拜拜、爱过 提交于 2020-02-28 04:53:51

insert

使用insert语句只适合加载少量的数据。

insert into tablename values (val1,val2,...);insert into (...) select ... from tabname

copy

copy命令可以将文件导入和导出,在gp中数据需要通过master节点,无法实现各个segment节点并行高效数据导入和导出。
使用copy命令的语法如下:

--\h command :可以获取命令的语法
postgres=# \h copy  
Command:     COPY
Description: copy data between a file and a table
Syntax:
--将文件数据导入表,数据是追加到表中的
COPY table [(column [, ...])] FROM {'file' | STDIN}
     [ [WITH] 
       [OIDS]  
       [HEADER]  
       [DELIMITER [ AS ] 'delimiter']  
       [NULL [ AS ] 'null string']
       [ESCAPE [ AS ] 'escape' | 'OFF']
       [NEWLINE [ AS ] 'LF' | 'CR' | 'CRLF']
       [CSV [QUOTE [ AS ] 'quote'] 
            [FORCE NOT NULL column [, ...]]
       [FILL MISSING FIELDS]
     [ [LOG ERRORS INTO error_table] [KEEP] 
       SEGMENT REJECT LIMIT count [ROWS | PERCENT] ]
 
 --将表中数据导入到文件中
COPY {table [(column [, ...])] | (query)} TO {'file' | STDOUT}
      [ [WITH] 
        [OIDS]
        [HEADER]
        [DELIMITER [ AS ] 'delimiter']
        [NULL [ AS ] 'null string']
        [ESCAPE [ AS ] 'escape' | 'OFF']
        [CSV [QUOTE [ AS ] 'quote'] 
             [FORCE QUOTE column [, ...]] ]

外部表

greenplum支持数据的并发加载,gpfdist是并发加载的工具,使用的是外部表。
gpfdist的实现架构图如下:
在这里插入图片描述
gpfdist的工作流程:

  1. 启动gpfdist,并在Master上建表,定义外部表的原数据信息。
  2. 将外部表插入到一张gp的物理表中,开始导入数据。
  3. Segment根据建表时定义的gpfdist url个数,启动相同的并发到gpfdist获取数据。
  4. Segment连接到gpfdist,gpfdist接收数据时开始读取文件,顺序读物文件,然后将文件拆分成多个块,随机抛给Segment。

外部表是一张表的数据指向数据库之外的数据文件。可以对一个外部表执行正常的DML操作,读取数据时,数据库就从数据文件中加载数据。

外部表分为可读和可写外部表,可读外部表用于数据装载、可写外部表用于数据卸载。

创建外部表的语法:

CREATE [READABLE] EXTERNAL TABLE table_name 
     ( column_name data_type [, ...] | LIKE other_table )
      LOCATION ('file://seghost[:port]/path/file' [, ...])
        | ('gpfdist://filehost[:port]/file_pattern[#transform]'
        | ('gpfdists://filehost[:port]/file_pattern[#transform]'
            [, ...])
        | ('gphdfs://hdfs_host[:port]/path/file')
      FORMAT 'TEXT' 
            [( [HEADER]
               [DELIMITER [AS] 'delimiter' | 'OFF']
               [NULL [AS] 'null string']
               [ESCAPE [AS] 'escape' | 'OFF']
               [NEWLINE [ AS ] 'LF' | 'CR' | 'CRLF']
               [FILL MISSING FIELDS] )]
           | 'CSV'
            [( [HEADER]
               [QUOTE [AS] 'quote'] 
               [DELIMITER [AS] 'delimiter']
               [NULL [AS] 'null string']
               [FORCE NOT NULL column [, ...]]
               [ESCAPE [AS] 'escape']
               [NEWLINE [ AS ] 'LF' | 'CR' | 'CRLF']
               [FILL MISSING FIELDS] )]
           | 'CUSTOM' (Formatter=<formatter specifications>)
     [ ENCODING 'encoding' ]
     [ [LOG ERRORS INTO error_table] SEGMENT REJECT LIMIT count 
       [ROWS | PERCENT] ]
 ---------------------------------------------------------------      
CREATE [READABLE] EXTERNAL WEB TABLE table_name 
     ( column_name data_type [, ...] | LIKE other_table )
      LOCATION ('http://webhost[:port]/path/file' [, ...])
    | EXECUTE 'command' [ON ALL 
                          | MASTER
                          | number_of_segments 
                          | HOST ['segment_hostname'] 
                          | SEGMENT segment_id ]
      FORMAT 'TEXT' 
            [( [HEADER]
               [DELIMITER [AS] 'delimiter' | 'OFF']
               [NULL [AS] 'null string']
               [ESCAPE [AS] 'escape' | 'OFF']
               [NEWLINE [ AS ] 'LF' | 'CR' | 'CRLF']
               [FILL MISSING FIELDS] )]
           | 'CSV'
            [( [HEADER]
               [QUOTE [AS] 'quote'] 
               [DELIMITER [AS] 'delimiter']
               [NULL [AS] 'null string']
               [FORCE NOT NULL column [, ...]]
               [ESCAPE [AS] 'escape']
               [NEWLINE [ AS ] 'LF' | 'CR' | 'CRLF']
               [FILL MISSING FIELDS] )]
           | 'CUSTOM' (Formatter=<formatter specifications>)
     [ ENCODING 'encoding' ]
     [ [LOG ERRORS INTO error_table] SEGMENT REJECT LIMIT count 
       [ROWS | PERCENT] ]
------------------------------------------------------------------
CREATE WRITABLE EXTERNAL TABLE table_name
    ( column_name data_type [, ...] | LIKE other_table )
     LOCATION('gpfdist://outputhost[:port]/filename[#transform]'
      | ('gpfdists://outputhost[:port]/file_pattern[#transform]'
          [, ...])
      | ('gphdfs://hdfs_host[:port]/path')
      FORMAT 'TEXT' 
               [( [DELIMITER [AS] 'delimiter']

外部表的优势在于支持从各个segment上并发地通过gpfdist导入数据,效率高。

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!