问题
I have received a workbook which contains two tables in power-pivot (one around 1 mill rows, another 20 mill rows). I would like to rip this out (as anything really - but let's say a CSV) so that I can use it in R + PostGreSQL.
I can't export to an Excel table as there are more than 1 million rows; and copy-pasting the data only works when I select around 200,000 rows. So I'm a bit stuck! I tried converting the xlsx into a zip and opening the "item.data" file in notepad++, however it was encrypted in something.
I would appreciate any solution (happy to use VBA, Python, SQL)
Edit: I put together some VBA which works OK for around 0.5 mill rows however breaks for the 17 mill row document:
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim sheet As Excel.Worksheet
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
wbTarget.Model.Initialize
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
' E.g. Partners
WriteDmvContent "Partners", conn
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
mdx = "EVALUATE " & dmvName
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\output_table_" & dmvName & ".csv"
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i).Name
Else
Write #1, rs.Fields(i).Name,
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i)
Else
Write #1, rs.Fields(i),
End If
Next i
rs.MoveNext
Loop
Close #1
rs.Close
Set rs = Nothing
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
回答1:
DAX Studio will allow you to query the data model in an Excel workbook and output to various formats, including flat files.
The query you'll need is just:
EVALUATE
<table name>
回答2:
I have found a working (VBA) solution [but greggy's also works for me too!] -> my table was too big to export in one chunk so I loop over and filter by 'month'. This seems to work and produces a 1.2 gb CSV after I append all together:
Function YYYYMM(aDate As Date)
YYYYMM = year(aDate) * 100 + month(aDate)
End Function
Function NextYYYYMM(YYYYMM As Long)
If YYYYMM Mod 100 = 12 Then
NextYYYYMM = YYYYMM + 100 - 11
Else
NextYYYYMM = YYYYMM + 1
End If
End Function
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim tblname As String
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
wbTarget.Model.Initialize
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
tblname = "table1"
WriteDmvContent tblname, conn
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
'If table small enough:
'mdx = "EVALUATE " & dmvName
'Other-wise filter:
Dim eval_field As String
Dim eval_val As Variant
'Loop through year_month
Dim CurrYM As Long, LimYM As Long
Dim String_Date As String
CurrYM = YYYYMM(#12/1/2000#)
LimYM = YYYYMM(#12/1/2015#)
Do While CurrYM <= LimYM
String_Date = CStr(Left(CurrYM, 4)) + "-" + CStr(Right(CurrYM, 2))
Debug.Print String_Date
eval_field = "yearmonth"
eval_val = String_Date
mdx = "EVALUATE(CALCULATETABLE(" & dmvName & ", " & dmvName & "[" & eval_field & "] = """ & eval_val & """))"
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\vba_tbl_" & dmvName & "_" & eval_val & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Else
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Else
Write #1, """" & rs.Fields(i) & """",
End If
Next i
rs.MoveNext
Loop
CurrYM = NextYYYYMM(CurrYM)
i = i + 1
Close #1
rs.Close
Set rs = Nothing
Loop
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
来源:https://stackoverflow.com/questions/34748748/rip-data-from-power-pivot-item-data