Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge retrde's branch #3

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 0 additions & 154 deletions Code/Malicious URL detection tool/main.py

This file was deleted.

159 changes: 159 additions & 0 deletions Code/Malicious URL detection tool/python/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import pandas as pd
import numpy as np
import sklearn

import random
from tkinter import *
from tkinter import ttk
import time

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer

boolean= False
to_ret=[]


def getTkns(input):
tknsBySlash= str(input.encode('utf-8')).split('/')
allTkns= []
for i in tknsBySlash:
tkns= str(i).split('-')
tknsByDot= []
for j in range(0, len(tkns)):
tempTkns= tkns[j].split('.')
tknsByDot= tknsByDot + tempTkns
allTkns= allTkns+tkns+tknsByDot
allTkns= list(set(allTkns))
# Since .com is most common domain, we actually don't need it.
if 'com' in allTkns:
allTkns.remove('com')
return allTkns


def trainer():
global to_ret
global txt

# ONE TIME EXECUTION STARTS HERE
csv= pd.read_csv("data.csv", error_bad_lines= False)
data= pd.DataFrame(csv)
```
data= np.array(data)
random.shuffle(data)

y= [d[1] for d in data]
corp= [d[0] for d in data]

count_vectorizer= CountVectorizer(tokenizer= getTkns)
tf_vectorizer= TfidfVectorizer(tokenizer= getTkns)

X1= count_vectorizer.fit_transform(corp)
X2= tf_vectorizer.fit_transform(corp)

# Fit the Logistic Regression Model
X1_train, X1_test, y1_train, y1_test= train_test_split(X1, y, test_size= 0.2, random_state= 42)
X2_train, X2_test, y2_train, y2_test= train_test_split(X2, y, test_size= 0.2, random_state= 42)
lgs_count= LogisticRegression()
lgs_tf = LogisticRegression()
lgs_count.fit(X1_train, y1_train)
lgs_tf.fit(X2_train,y2_train)

out1= "The accuracy of Model with Count Vectorizer is "+str(lgs_count.score(X1_test, y1_test))+"\n"
out2= "The accuracy of Model with TFIDF Vectorizer is "+str(lgs_tf.score(X2_test, y2_test))+"\n"
txt.insert(0.0,out1)
txt.insert(1.0,out2)


to_ret.append(count_vectorizer)
to_ret.append(tf_vectorizer)
to_ret.append(lgs_count)
to_ret.append(lgs_tf)
return to_ret




def callback():

global boolean
global to_ret
global txt
global choice

txtname= entry.get()
if txtname=="":
txt.insert(0.0,"Enter valid URL\n")
return
Choice= choice.get()
if Choice == "":
txt.insert(0.0,"Please choose any one of the two options.\n")
return


X_predict= [txtname]
if Choice=="CV":
X_predict= to_ret[0].transform(X_predict)
y_predict= to_ret[2].predict(X_predict)
out= str(txtname)+" is found "+str(y_predict)+"\n"
txt.insert(0.0, out)
if Choice=="TF":
X_predict= to_ret[1].transform(X_predict)
y_predict= to_ret[3].predict(X_predict)
out= str(txtname)+" is found "+str(y_predict)+"\n"
txt.insert(0.0, out)

return



root= Tk()
choice= StringVar()
frame= ttk.Frame(root)
frame.pack()
frame.config(height= 600, width= 800)
frame.config(relief= RAISED)

label= ttk.Label(frame, text="Malicious URL Detection Tool")
label.config(foreground='#1aaedb',background='#dbbf23')
label.config(justify=CENTER)
label.config(font=('segoe',18,'bold'))
label.pack()

label2= ttk.Label(frame, text='\n\n')
label2.pack()

ttk.Radiobutton(frame,text='Use Count Vectorizer',variable=choice,value='CV').pack(anchor='w')
label3= ttk.Label(frame, text='\n')
label3.pack()

ttk.Radiobutton(frame,text='Use Tf-Idf Vectorizer',variable=choice,value='TF').pack(anchor='w')
label4= ttk.Label(frame, text='\n')
label4.pack()

label5= ttk.Label(frame, text='Enter any valid URL to Check')
label5.pack()

to_check= StringVar()
entry= ttk.Entry(frame,textvariable=to_check,width=60)
entry.pack()



label5= ttk.Label(frame, text='\n')
label5.pack()

progressbar= ttk.Progressbar(frame,orient= HORIZONTAL,length=200)
progressbar.pack()
progressbar.config(mode= 'indeterminate')


button= ttk.Button(frame,text='Check',command=callback).pack()


txt= Text(frame,height=10, wrap= WORD)
txt.pack()
trainer()
root.mainloop()
4 changes: 4 additions & 0 deletions Code/Malicious URL detection tool/python/run-all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Giving sudo priviledges

sudo python3 init.py
sudo python3 main.py
14 changes: 14 additions & 0 deletions Code/Malicious URL detection tool/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## Malicious URL detection tool

### Execution options:

- Python:
- execute main.py from terminal or ide having installed dependencies of requirements.txt

- Web-app:
- open index.html at any javascript compatible HTML5 engine with CSS3 support

### To-do:

- Complete web-app files index.html, add some styling and javascript code
- If possible, convert Python tool to a web-app!
1 change: 0 additions & 1 deletion Code/Malicious URL detection tool/run-python.sh

This file was deleted.

8 changes: 8 additions & 0 deletions Code/Malicious URL detection tool/web/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<!DOCTYPE HTML5>
<head>

</head>
<body>
Add site compatible page of tool
</body>
</HTML>
3 changes: 3 additions & 0 deletions Code/Malicious URL detection tool/web/js/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
var train(){
// Create JS equivalent function for trainer() from python
}
6 changes: 6 additions & 0 deletions Code/Malicious URL detection tool/web/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## Web version of Malicious URL detection tool

### To-do list:

- Add a free subdomain for site
- Compelete the site :(
9 changes: 9 additions & 0 deletions Code/Malicious URL detection tool/web/style/main.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.background {
color: white,
image: none
}

.h1 {
color: black,
font-size: 18
}
Binary file removed Research Paper/JAMM-abstract-paper.pdf
Binary file not shown.