问题
I'm trying to reshape a two column data frame by collapsing the corresponding column values that match in column 2 - in this case ticker symbols to their own unique row while making the contents of column 1 which are the fields of data that correspond to those tickers their own columns. See below for my example with a small sample since it's a data frame with 500 tickers and 4 fields:
# Closed End Fund Selector
url<-"https://www.cefconnect.com/api/v3/DailyPricing?props=Ticker,Name,DistributionRateNAV,LastUpdated,Discount,DistributionRatePrice,ReturnOnNAV,CategoryId,CategoryName,IsManagedDistribution,Price,PriceChange,NAV,NAVPublished,Cusip/&_=1546832481302"
library(jsonlite)
library(rvest)
library(dplyr)
page<-html_session(url)
json<-readBin(page$response$content, what="json")
df<-fromJSON(json)
# Analyze and Group Closed End Funds by Investment Strategy and Average/Min/Max Discounts to NAV
df2 <- df %>% group_by(CategoryName) %>%
summarize(Category_Fund_Avg_NAV_Difference= mean(Discount,na.rm=T),
Min_NAV_Fund = Name[which(Discount == min(Discount))], Min_NAV_Ticker = Ticker[which(Discount == min(Discount))],
Min_Nav_Fund_Difference = min(Discount),
Max_NAV_Fund = Name[which(Discount == max(Discount))],
Max_NAV_Ticker = Ticker[which(Discount == max(Discount))],Max_Nav_Fund_Difference = max(Discount))
df2 <- data.frame(df2)
tickers <- df$Ticker
tickers <- paste0("https://www.cefconnect.com/fund/", tickers)
lst_scraped_data <- lapply(tickers, FUN=function(URLLink){
CEF_Scrape <- read_html(URLLink)
test9 <- CEF_Scrape %>%
html_nodes("#ContentPlaceHolder1_cph_main_cph_main_SummaryGrid") %>%
html_text() %>%
strsplit(split = "\n") %>%
unlist() %>%
.[. != " "]
test9 <- str_replace_all(test9,pattern = "\t", replacement = "")
test9 <- str_replace_all(test9,pattern = "\r", replacement = "")
test9 <- str_trim(test9,side="left")
test9 <- test9[test9 != ""]
test9 <- str_replace_all(test9,pattern="SharePriceNAVPremium/Discount",replacement = "SharePrice NAV Premium/Discount")
test9 <- str_replace_all(test9,pattern="Current",replacement = "Current ")
test9 <- str_replace_all(test9,pattern="52 Wk Avg",replacement = "52WkAvg ")
test9 <- str_replace_all(test9,pattern="52 Wk High",replacement = "52WkHigh ")
test9 <- str_replace_all(test9,pattern="52 Wk Low",replacement = "52WkLow ")
test9 <- str_replace_all(test9,pattern="-",replacement = " -")
#test9 <- str_replace_all(test9,pattern="$",replacement = " $")
# The below two lines are needed for edge cases where the premium/discount % is positive or negative
#test9 <- sub("\\s", "", gsub('(\\$.{2})', '\\1 ', test9))
#test9 <- gsub("(?<=[0-9])(?=[$])", " ", test9, perl = TRUE)
#test9 <- str_replace_all(test9,pattern="$ ",replacement = "$")
test9 <- gsub('(\\.\\d{2})', '\\1 ', test9, perl = T)
test9 <- trimws(gsub('\\s%', '% ', test9))
# This creates a space between prices based on a number followed by a '$'
test9 <- gsub("([0-9])([$])", "\\1 \\2", test9)
# This creates a space for the % values (usually a - sign)
#test9 <- gsub("(?=[-])", " ", test9, perl = TRUE)
#test9 <- gsub("(?<=\\.\\d{2})(?!%)", " ", test9, perl = TRUE)
#test <- sub("((?<=\\.\\d{2})|(?<=%))(?=[\\d$-])", " ", test9, perl = TRUE)
#gsub("(?<=[.])", " ", test9, perl = TRUE)
# Separate by whitespace into unique elements each word so as to create a column vector for indexing
test9 <- unlist(sapply(test9, strsplit, "\\s+", USE.NAMES = FALSE))
test10 <- paste(test9[4],test9[1],test9[5])
test11 <- paste(test9[4],test9[2],test9[6])
test12 <- paste(test9[4],test9[3],test9[7])
test13 <- paste(test9[8],test9[1],test9[9])
test14 <- paste(test9[8],test9[2],test9[10])
test15 <- paste(test9[8],test9[3],test9[11])
test16 <- paste(test9[12],test9[1],test9[13])
test17 <- paste(test9[12],test9[2],test9[14])
test18 <- paste(test9[12],test9[3],test9[15])
test19 <- paste(test9[16],test9[1],test9[17])
test20 <- paste(test9[16],test9[2],test9[18])
test21 <- paste(test9[16],test9[3],test9[19])
test22 <- c(test10, test11, test12, test13, test14, test15, test16, test17, test18, test19, test20, test21)
Ticker <- str_replace_all(URLLink,pattern="https://www.cefconnect.com/fund/",replacement = "")
Checker = data.frame(test22,Ticker)
})
df6 <- do.call(rbind, lst_scraped_data)
Ideally, the ticker column after the reformatting transformation is a unique row with the ticker at the row name with, in this case, 12 corresponding columns containing the contents of the "test22" column of "df6" but their own names at this stage aren't import. Help is much appreciated!
This is a sample of the first 19 rows of the output from the above script:
test22 Ticker
1 Current SharePrice $6.57 MFM
2 Current NAV $7.11 MFM
3 Current Premium/Discount -7.59% MFM
4 52WkAvg SharePrice $6.55 MFM
5 52WkAvg NAV $7.21 MFM
6 52WkAvg Premium/Discount -9.19% MFM
7 52WkHigh SharePrice $6.88 MFM
8 52WkHigh NAV $7.34 MFM
9 52WkHigh Premium/Discount -5.88% MFM
10 52WkLow SharePrice $6.05 MFM
11 52WkLow NAV $7.03 MFM
12 52WkLow Premium/Discount -14.43% MFM
13 Current SharePrice $4.84 CXE
14 Current NAV $5.21 CXE
15 Current Premium/Discount -7.10% CXE
16 52WkAvg SharePrice $4.91 CXE
17 52WkAvg NAV $5.29 CXE
18 52WkAvg Premium/Discount -7.26% CXE
19 52WkHigh SharePrice $5.31 CXE
20 52WkHigh NAV $5.37 CXE
21 52WkHigh Premium/Discount -1.12% CXE
22 52WkLow SharePrice $4.58 CXE
23 52WkLow NAV $5.16 CXE
24 52WkLow Premium/Discount -11.92% CXE
25 Current SharePrice $4.33 CMU
This is how I'd like the reformatted output to look:
After using dput -->
test22 Ticker
1 Current SharePrice $6.57 MFM 2 Current NAV $7.11 MFM 3 Current Premium/Discount -7.59% MFM 4 52WkAvg SharePrice $6.55 MFM 5 52WkAvg NAV $7.21 MFM 6 52WkAvg Premium/Discount -9.19% MFM 7 52WkHigh SharePrice $6.88 MFM 8 52WkHigh NAV $7.34 MFM 9 52WkHigh Premium/Discount -5.88% MFM 10 52WkLow SharePrice $6.05 MFM 11 52WkLow NAV $7.03 MFM 12 52WkLow Premium/Discount -14.43% MFM 13 Current SharePrice $4.84 CXE 14 Current NAV $5.21 CXE 15 Current Premium/Discount -7.10% CXE 16 52WkAvg SharePrice $4.91 CXE 17 52WkAvg NAV $5.29 CXE 18 52WkAvg Premium/Discount -7.26% CXE 19 52WkHigh SharePrice $5.31 CXE
来源:https://stackoverflow.com/questions/54185678/reformat-and-collapse-data-frame-based-on-corresponding-column-identifier-code-r