Skip to content

Commit 75f663e

Browse files
authored
Merge pull request #287 from massimoaria/develop
Develop to CRAN
2 parents 5e3c37c + 310c25d commit 75f663e

33 files changed

+3996
-3564
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@
66
.txt
77
.bib
88
.Ruserdata
9+
.DS_Store
910
VOSviewer.jar
1011
network.net
1112
Rubbish
1213
desktop.ini
1314
vignette.txt
1415
inst/doc
16+
inst/biblioshiny/__MACOSX
17+
inst/biblioshiny/rsconnect
1518
_gh-pages
1619

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: bibliometrix
22
Type: Package
33
Title: Comprehensive Science Mapping Analysis
4-
Version: 4.0.2
4+
Version: 4.1.0
55
Authors@R: c(
66
person(given = "Massimo",
77
family = "Aria",
@@ -48,7 +48,7 @@ Imports: stats,
4848
shiny,
4949
SnowballC,
5050
stringdist,
51-
stringr,
51+
stringi,
5252
tidyr,
5353
tidytext
5454
Suggests:
@@ -59,6 +59,6 @@ Suggests:
5959
shinycssloaders,
6060
visNetwork,
6161
wordcloud2
62-
RoxygenNote: 7.2.1
62+
RoxygenNote: 7.2.3
6363
NeedsCompilation: no
6464
Config/testthat/edition: 3

NAMESPACE

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ export(localCitations)
2626
export(lotka)
2727
export(mergeDbSources)
2828
export(metaTagExtraction)
29+
export(missingData)
2930
export(net2VOSviewer)
3031
export(networkPlot)
3132
export(networkStat)
@@ -56,6 +57,7 @@ import(readr)
5657
import(readxl)
5758
import(shiny)
5859
import(stats)
60+
import(stringi)
5961
import(tidytext)
6062
importFrom(DT,DTOutput)
6163
importFrom(DT,datatable)
@@ -294,9 +296,6 @@ importFrom(rscopus,author_df_orig)
294296
importFrom(rscopus,author_search)
295297
importFrom(rscopus,get_complete_author_info)
296298
importFrom(stringdist,stringdistmatrix)
297-
importFrom(stringr,str_extract_all)
298-
importFrom(stringr,str_locate_all)
299-
importFrom(stringr,str_replace_all)
300299
importFrom(tidyr,drop_na)
301300
importFrom(tidyr,gather)
302301
importFrom(tidyr,pivot_longer)

NEWS

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
bibliometrix v4.0.1 (Release date: 2023-01-13)
2+
3+
Features:
4+
* Added a new function missingData() to check the completeness of metadata included in a bibliographic data frame
5+
* Biblioshiny: Added the ability to create an excel report by adding step by step results of different analysis
6+
* Biblioshiny: Added a popup that returns the results of the metadata completeness check of imported collections
7+
* Biblioshiny: Revamped interface with floating options menu and more space for graphical analysis results
8+
9+
Changes:
10+
* Several bug fixes
11+
* Computational speed improvements
12+
13+
14+
115
bibliometrix v4.0.1 (Release date: 2022-09-16)
216

317
Features:

R/bib2df.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ bib2df<-function(D, dbsource = "isi"){
2222

2323
if (dbsource == "isi") D <- gsub(" = \\{","={",D)
2424

25+
D <- gsub("\\\t","",gsub(" = \\{","=\\{",D)) # to work also with new scopus bib format
26+
2527
D[Papers] <- paste("Paper={",D[Papers],sep="")
26-
#ii <- regexpr("\\{",D[Papers])
28+
2729
ind <- regexpr("=\\{",D) # sep among tags and contents
2830
ind[Papers] <- 6
2931

R/conceptualStructure.R

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -409,36 +409,34 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N
409409
b_doc_TC <- b_doc_TC + annotation_custom(logo, xmin = xl[1], xmax = xl[2], ymin = yl[1], ymax = yl[2])
410410
##
411411

412-
params <- list(field = field,
413-
ngrams = ngrams,
414-
method=method,
415-
quali.supp=quali.supp,
416-
quanti.supp=quanti.supp,
417-
minDegree=minDegree,
418-
clust=clust,
419-
k.max=k.max,
420-
stemming = stemming,
421-
labelsize=labelsize,
422-
documents=documents,
423-
graph=graph,
424-
remove.terms = remove.terms,
425-
synonyms = synonyms)
426-
427-
params <- data.frame(params=names(unlist(params)),values=unlist(params), row.names = NULL)
428412

429413
if (isTRUE(graph)){plot(b_doc_TC)}
430414

431415
semanticResults=list(net=CW,res=res.mca,km.res=km.res,graph_terms=b,graph_dendogram=b_dend,
432-
graph_documents_Contrib=b_doc,graph_documents_TC=b_doc_TC,docCoord=docCoord,
433-
params=params)
416+
graph_documents_Contrib=b_doc,graph_documents_TC=b_doc_TC,docCoord=docCoord)
434417

435418
}else{
436419

437420
semanticResults=list(net=CW,res=res.mca,km.res=km.res,graph_terms=b,graph_dendogram=b_dend,
438-
graph_documents_Contrib=NULL,graph_documents_TC=NULL,docCoord=NULL,
439-
params=params)
421+
graph_documents_Contrib=NULL,graph_documents_TC=NULL,docCoord=NULL)
440422
}
441423

424+
params <- list(field = field,
425+
ngrams = ngrams,
426+
method=method,
427+
quali.supp=quali.supp,
428+
quanti.supp=quanti.supp,
429+
minDegree=minDegree,
430+
clust=clust,
431+
k.max=k.max,
432+
stemming = stemming,
433+
labelsize=labelsize,
434+
documents=documents,
435+
graph=graph,
436+
remove.terms = remove.terms,
437+
synonyms = synonyms)
438+
439+
semanticResults$params <- data.frame(params=names(unlist(params)),values=unlist(params), row.names = NULL)
442440

443441

444442
return(semanticResults)

R/couplingMap.R

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,22 @@ couplingMap <- function(M, analysis = "documents", field="CR", n=500, label.term
195195
row.names(df)=NULL
196196
df <- df %>% rename(items = .data$words)
197197

198-
results=list(map=g, clusters=df, data=df_lab,nclust=dim(df)[1], NCS = D, net=Net)
198+
params <- list(analysis = analysis,
199+
field=field,
200+
n=n,
201+
minfreq=minfreq,
202+
label.term=label.term,
203+
ngrams=ngrams,
204+
impact.measure=impact.measure,
205+
stemming=stemming,
206+
n.labels=n.labels,
207+
size=size,
208+
community.repulsion = community.repulsion,
209+
repel=repel,
210+
cluster=cluster)
211+
params <- data.frame(params=names(unlist(params)),values=unlist(params), row.names = NULL)
212+
213+
results=list(map=g, clusters=df, data=df_lab,nclust=dim(df)[1], NCS = D, net=Net, params=params)
199214
return(results)
200215
}
201216

@@ -315,7 +330,7 @@ labeling <- function(M, df_lab, term, n, n.labels, analysis, ngrams){
315330

316331
#clusters <- unique(df$Cluster)
317332
#w <- character(length(clusters))
318-
333+
df$SR <- df[,1]
319334
tab_global <- tableTag(df, term)
320335
tab_global <- data.frame(label=names(tab_global),tot=as.numeric(tab_global), n=nrow(M),stringsAsFactors = FALSE)
321336

R/dimensions2df.R

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ dimensions2df <- function(file, format = "csv") {
7676

7777

7878
postprocessingDim <- function(DATA) {
79-
DATA <- data.frame(lapply(DATA, toupper), stringsAsFactors = FALSE)
79+
# DATA <- data.frame(lapply(DATA, toupper), stringsAsFactors = FALSE)
8080

8181
## Converting original references in WOS format (AU, PY, SO, VOL, NUM, DOI)
8282
if ("Cited.references" %in% names(DATA)) {
@@ -232,15 +232,20 @@ postprocessingDim <- function(DATA) {
232232
if (("SO" %in% names(DATA)) & ("Anthology.title" %in% names(DATA))) {
233233
ind <- which(is.na(DATA$SO) | DATA$SO=="")
234234
DATA$SO[ind] <- DATA$Anthology.title[ind]
235-
DATA$SO[DATA$SO==""] <- NA
235+
DATA$SO[is.na(DATA$SO) | DATA$SO==""] <- "NA"
236236
}
237237

238238
if (!("SO" %in% names(DATA))) {
239239
DATA$SO <- "NA"
240240
}
241241

242+
####
243+
cat("\nCreating ISO Source names...")
242244
DATA$JI <- sapply(DATA$SO, AbbrevTitle, USE.NAMES = FALSE)
243245
DATA$J9 <- gsub("\\.","",DATA$JI)
246+
####
247+
248+
DATA <- data.frame(lapply(DATA, toupper), stringsAsFactors = FALSE)
244249

245250
DATA$PY <- as.numeric(DATA$PY)
246251

R/histNetwork.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ scopus <- function(M, min.citations, sep, network, verbose){
203203
nCum <- c(1, cumsum(n[-length(n)]))
204204
CR <- paste(CR, collapse = " ")
205205

206-
L <- str_locate_all(CR, TIpost)
207-
206+
#L <- str_locate_all(CR, TIpost)
207+
L <- stringi::stri_locate_all_regex(CR,TIpost, omit_no_match = TRUE)
208208

209209
LCS <- lengths(L) / 2
210210

R/histPlot.R

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,14 @@ histPlot<-function(histResults, n=20, size = 5, labelsize = 5, title_as_label =
7474

7575
switch(label,
7676
title={
77-
title <- strsplit(stringr::str_to_title(V(bsk.network)$title), " ")
77+
title <- strsplit(stringi::stri_trans_totitle(V(bsk.network)$title), " ")
7878
V(bsk.network)$id <- unlist(lapply(title, function(l){
7979
n <- floor(length(l)/2)
8080
paste0(paste(l[1:n], collapse=" ", sep=""),"\n",paste(l[(n+1):length(l)], collapse=" ", sep=""))
8181
}))
8282
},
8383
keywords={
84-
kw <- strsplit(stringr::str_to_title(V(bsk.network)$keywords), ";")
84+
kw <- strsplit(stringi::stri_trans_totitle(V(bsk.network)$keywords), ";")
8585
kw[is.na(kw)] <- "Not Available"
8686
V(bsk.network)$id <- unlist(lapply(kw, function(l){
8787
if (length(l)>1){
@@ -92,7 +92,7 @@ histPlot<-function(histResults, n=20, size = 5, labelsize = 5, title_as_label =
9292
}))
9393
},
9494
keywordsplus={
95-
kw <- strsplit(stringr::str_to_title(V(bsk.network)$keywordsplus), ";")
95+
kw <- strsplit(stringi::stri_trans_totitle(V(bsk.network)$keywordsplus), ";")
9696
kw[is.na(kw)] <- "Not Available"
9797
V(bsk.network)$id <- unlist(lapply(kw, function(l){
9898
if (length(l)>1){
@@ -107,24 +107,13 @@ histPlot<-function(histResults, n=20, size = 5, labelsize = 5, title_as_label =
107107
}
108108
)
109109

110-
# if (isTRUE(title_as_label)){
111-
# title <- strsplit(stringr::str_to_title(V(bsk.network)$title), " ")
112-
# V(bsk.network)$id <- unlist(lapply(title, function(l){
113-
# n <- floor(length(l)/2)
114-
# paste0(paste(l[1:n], collapse=" ", sep=""),"\n",paste(l[(n+1):length(l)], collapse=" ", sep=""))
115-
# }))
116-
# #V(bsk.network)$id <- tolower(paste(substr(V(bsk.network)$title,1,50),"...",sep=""))
117-
# } else {
118-
# V(bsk.network)$id <- tolower(unlist(RR))
119-
# }
120-
121110
# Compute node degrees (#links) and use that to set node size:
122111
deg <- LCS
123112
V(bsk.network)$size <- size
124113
#rep(size,length(V(bsk.network)))}
125114

126115
#Years=histResults$histData$Year[ind]
127-
Years <- as.numeric(unlist(str_extract_all(unlist(RR),"[[:digit:]]{4}$")))
116+
Years <- as.numeric(unlist(stringi::stri_extract_all_regex(unlist(RR),"[[:digit:]]{4}$")))
128117
V(bsk.network)$years <- Years
129118

130119
# Remove loops

R/keywordGrowth.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ KeywordGrowth <- function(M, Tag = "ID", sep = ";", top=10, cdf=TRUE, remove.ter
5252
A <- A %>%
5353
mutate(
5454
# Tab = str_replace_all(Tab, paste(sold[[i]], collapse="|",sep=""),snew[i])
55-
Tab= str_replace_all(Tab, str_replace_all(str_replace_all(paste(sold[[i]], collapse="|",sep=""),"\\(","\\\\("),"\\)","\\\\)"),snew[i])
56-
55+
#Tab= str_replace_all(Tab, str_replace_all(str_replace_all(paste(sold[[i]], collapse="|",sep=""),"\\(","\\\\("),"\\)","\\\\)"),snew[i]),
56+
Tab= stringi::stri_replace_all_regex(Tab, stringi::stri_replace_all_regex(stringi::stri_replace_all_regex(paste(sold[[i]], collapse="|",sep=""),"\\(","\\\\("),"\\)","\\\\)"),snew[i])
5757
)
5858
}
5959
}

R/missingData.R

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#' Completeness of bibliographic metadata
2+
#'
3+
#' It calculates the percentage of missing data in the metadata of a bibliographic data frame.
4+
#'
5+
#' Each metadata is assigned a status c("Excellent," "Good," "Acceptable", "Poor", "Critical," "Completely missing")
6+
#' depending on the percentage of missing data. In particular, the column *status* classifies the percentage of missing
7+
#' value in 5 categories: "Excellent" (0%), "Good" (0.01% to 10.00%), "Acceptable" (from 10.01% to 20.00%),
8+
#' "Poor" (from 20.01% to 50.00%), "Critical" (from 50.01% to 99.99%), "Completely missing" (100%).
9+
#'
10+
#' The results of the function allow us to understand which analyses can be performed with bibliometrix
11+
#' and which cannot based on the completeness (or status) of different metadata.
12+
#' @param M is a bibliographic data frame obtained by \code{\link{convert2df}} function.
13+
#'
14+
#' @return The function \code{missingData} returns a list containing two objects:
15+
#' \tabular{lll}{
16+
#' \code{allTags} \tab \tab is a data frame including results for all original metadata tags from the collection\cr
17+
#' \code{mandatoryTags}\tab \tab is a data frame that included only the tags needed for analysis with bibliometrix and biblioshiny.}
18+
#'
19+
#' @examples
20+
#' data(scientometrics, package = "bibliometrixData")
21+
#' res <- missingData(scientometrics)
22+
#' print(res$mandatoryTags)
23+
#'
24+
#' @export
25+
#'
26+
missingData <- function(M) {
27+
cols <- names(M)
28+
missing_counts <- sapply(cols, function(x){
29+
sum(is.na(M[,x]) | M[,x] %in% c("NA,0000,NA","NA",""))
30+
})
31+
missing_pct <- round(missing_counts/nrow(M) * 100, 2)
32+
df_all <- data.frame(cols, missing_counts, missing_pct)
33+
34+
tag <- unlist(
35+
strsplit(
36+
"AB,AU,C1,CR,DE,DI,DT,ID,LA,NR,PY,RP,SO,TC,TI,WC",","
37+
)
38+
)
39+
description <- trimws(unlist(
40+
strsplit(
41+
"Abstract, Author,Affiliation,Cited References,Keywords,DOI,Document Type,Keywords Plus,Language,Number of Cited References,
42+
Publication Year,Corresponding Author, Journal, Total Citation, Title, Science Categories", ","
43+
)
44+
))
45+
46+
df_all <- df_all %>%
47+
mutate(status = status(missing_pct)) %>%
48+
replace_na(replace = list(missing_counts = nrow(M), missing_pct = 100))
49+
50+
df_tags <- data.frame(tag, description) %>%
51+
left_join(df_all, by = c("tag" = "cols")) %>%
52+
replace_na(replace = list(missing_counts = nrow(M), missing_pct = 100, status = "Completely missing")) %>%
53+
arrange(missing_pct,description)
54+
55+
results <- list(allTags=df_all, mandatoryTags=df_tags)
56+
return(results)
57+
}
58+
59+
status <- function(x){
60+
y <- character(length(x))
61+
y[x==0] <- "Excellent"
62+
y[x>0 & x<= 10] <- "Good"
63+
y[x>10 & x<= 20] <- "Acceptable"
64+
y[x>20 & x<=50] <- "Poor"
65+
y[x>50 & x<100] <- "Critical"
66+
y[is.na(x) | x==100] <- "Completely missing"
67+
return(y)
68+
}
69+

R/rpys.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ y <- c(min(c(RPYS$Citations,RPYS$diffMedian)),min(c(RPYS$Citations,RPYS$diffMedi
109109

110110
g=ggplot(RPYS, aes(x=.data$Year ,y=.data$Citations,text=paste("Year: ",.data$Year,"\nN. of References: ",.data$Citations)))+
111111
geom_line(aes(group="NA")) +
112-
geom_area(aes(group="NA"),fill = 'grey90', alpha = .5) +
112+
#geom_area(aes(group="NA"),fill = 'grey90', alpha = .5) +
113113
#geom_hline(aes(yintercept=0, color = 'grey'))+
114114
geom_line(aes(x=.data$Year,y=.data$diffMedian, color="firebrick", group="NA"))+
115115
labs(x = 'Year'

R/thematicEvolution.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#'
1212
#' @param M is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
1313
#' @param field is a character object. It indicates the content field to use. Field can be one of c=("ID","DE","TI","AB"). Default value is \code{field="ID"}.
14-
#' @param years is a numeric vector of two or more unique cut points.
14+
#' @param years is a numeric vector of one or more unique cut points.
1515
#' @param n is numerical. It indicates the number of words to use in the network analysis
1616
#' @param minFreq is numerical. It indicates the min frequency of words included in to a cluster.
1717
#' @param ngrams is an integer between 1 and 4. It indicates the type of n-gram to extract from texts.
@@ -57,6 +57,7 @@ thematicEvolution <- function(M, field = "ID", years, n = 250, minFreq = 2, size
5757
resk <- thematicMap(Mk, field = field, n = n, minfreq = minFreq, ngrams=ngrams,
5858
stemming = stemming, size = size, n.labels = n.labels,
5959
repel = repel, remove.terms = remove.terms, synonyms = synonyms, cluster=cluster)
60+
resk$params <- resk$params %>% dplyr::filter(.data$params!="minfreq")
6061
res[[k]] <- resk
6162
net[[k]] <- resk$net
6263
}

R/toUpper.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
toUpper <- function(D){
2-
stringr::str_to_upper(D, locale = "en")
2+
stringi::stri_trans_toupper(D, locale = "en")
33
}

0 commit comments

Comments
 (0)