Skip to content

Commit 9fbeae4

Browse files
committed
fix
1 parent 944f1cf commit 9fbeae4

File tree

4 files changed

+39
-11
lines changed

4 files changed

+39
-11
lines changed

.vs/slnx.sqlite

0 Bytes
Binary file not shown.

LanguageNetwork/GPT2/scripts/formatter.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -84,17 +84,17 @@ def coarse_formatter(text):
8484
if(lens >= 10):
8585
while count < lens - 5:
8686
#print("para: ", para ," | final: ", lens - 8)
87-
paras.append(text_list[para:para+5])
87+
paras.append(text_list[count:count+5])
8888
count += 5
8989
# print("现在添加段尾:", text_list[para:-1])
9090
if count == lens - 1:
9191
pass
9292
else:
9393
paras.append(text_list[count:-1])
9494
else:
95-
paras.append(3)
96-
paras.append(lens - 5)
97-
paras.append(lens - 3)
95+
paras.append(text_list[:3])
96+
paras.append(text_list[3:lens - 5])
97+
paras.append(text_list[lens - 5:lens])
9898
# print("最终段落为:", paras)
9999
for para in paras:
100100
# print("paras: ", para)

LanguageNetwork/GPT2/scripts/gdown.pl

+34-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
12
#!/usr/bin/env perl
23
#
34
# Google Drive direct download of big files
45
# ./gdown.pl 'gdrive file url' ['desired file name']
56
#
67
# v1.0 by circulosmeos 04-2014.
78
# v1.1 by circulosmeos 01-2017.
8-
# v1.2, v1.3, v1.4 by circulosmeos 01-2019, 02-2019.
9+
# v1.2, 2.0 by circulosmeos 01-2019.
910
# //circulosmeos.wordpress.com/2014/04/12/google-drive-direct-download-of-big-files
1011
# Distributed under GPL 3 (//www.gnu.org/licenses/gpl-3.0.html)
1112
#
@@ -22,7 +23,7 @@
2223
die "\n./gdown.pl 'gdrive file url' [desired file name]\n\n" if $URL eq '';
2324

2425
my $FILENAME=shift;
25-
$FILENAME='gdown.'.strftime("%Y%m%d%H%M%S", localtime).'.'.substr(rand,2) if $FILENAME eq '';
26+
my $TEMP_FILENAME='gdown.'.strftime("%Y%m%d%H%M%S", localtime).'.'.substr(rand,2);
2627

2728
if ($URL=~m#^https?://drive.google.com/file/d/([^/]+)#) {
2829
$URL="https://docs.google.com/uc?id=$1&export=download";
@@ -33,8 +34,8 @@
3334

3435
execute_command();
3536

36-
while (-s $FILENAME < 100000) { # only if the file isn't the download yet
37-
open fFILENAME, '<', $FILENAME;
37+
while (-s $TEMP_FILENAME < 100000) { # only if the file isn't the download yet
38+
open fFILENAME, '<', $TEMP_FILENAME;
3839
$check=0;
3940
foreach (<fFILENAME>) {
4041
if (/href="(\/uc\?export=download[^"]+)/) {
@@ -63,13 +64,40 @@
6364
$URL=~s/confirm=([^;&]+)/confirm=$confirm/ if $confirm ne '';
6465

6566
execute_command();
67+
6668
}
6769

6870
unlink $TEMP;
6971

7072
sub execute_command() {
71-
$COMMAND="wget -q --show-progress --no-check-certificate --load-cookie $TEMP --save-cookie $TEMP \"$URL\"";
73+
my $OUTPUT_FILENAME = $TEMP_FILENAME;
74+
my $CONTINUE = '';
75+
76+
# check contents before download & if a $FILENAME has been indicated resume on content download
77+
# please, note that for this to work, wget must correctly provide --spider with --server-response (-S)
78+
if ( length($FILENAME) > 0 ) {
79+
$COMMAND="wget -q -S --no-check-certificate --spider --load-cookie $TEMP --save-cookie $TEMP \"$URL\" 2>&1";
80+
my @HEADERS=`$COMMAND`;
81+
foreach my $header (@HEADERS) {
82+
if ( $header =~ /Content-Type: (.+)/ ) {
83+
if ( $1 !~ 'text/html' ) {
84+
$OUTPUT_FILENAME = $FILENAME;
85+
$CONTINUE = '-c';
86+
}
87+
}
88+
}
89+
}
90+
91+
$COMMAND="wget $CONTINUE --progress=dot:giga --no-check-certificate --load-cookie $TEMP --save-cookie $TEMP \"$URL\"";
7292
$COMMAND.=" -O \"$FILENAME\"" if $FILENAME ne '';
73-
system ( $COMMAND );
93+
94+
my $OUTPUT = system( $COMMAND );
95+
if ( $OUTPUT == 2 ) { # do a clean exit with Ctrl+C
96+
unlink $TEMP;
97+
die "\nDownloading interrupted by user\n\n";
98+
} elsif ( $OUTPUT == 0 && length($CONTINUE)>0 ) { # do a clean exit with $FILENAME provided
99+
unlink $TEMP;
100+
die "\nDownloading complete\n\n";
101+
}
74102
return 1;
75103
}

colab_online.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
"!mkdir -p /home/EssayKiller_V2/LanguageNetwork/GPT2/finetune/trained_models\n",
5353
"\n",
5454
"%cd /home/EssayKiller_V2/LanguageNetwork/GPT2/finetune/\n",
55-
"!perl /home/EssayKiller_V2/LanguageNetwork/GPT2/scripts/gdown.pl https://drive.google.com/open?id=1ujWYTOvRLGJX0raH-f-lPZa3-RN58ZQx trained_models/model.ckpt-280000.data-00000-of-00001\n",
55+
"!perl /home/EssayKiller_V2/LanguageNetwork/GPT2/scripts/gdown.pl https://drive.google.com/file/d/1ujWYTOvRLGJX0raH-f-lPZa3-RN58ZQx trained_models/model.ckpt-280000.data-00000-of-00001\n",
5656
"!wget -q --show-progress https://github.com/EssayKillerBrain/EssayKiller/releases/download/v1.0/model.ckpt-280000.index -P /home/EssayKiller_V2/LanguageNetwork/GPT2/finetune/trained_models\n",
5757
"!wget -q --show-progress https://github.com/EssayKillerBrain/EssayKiller/releases/download/v1.0/model.ckpt-280000.meta -P /home/EssayKiller_V2/LanguageNetwork/GPT2/finetune/trained_models\n",
5858
"\n",

0 commit comments

Comments
 (0)