Skip to content

Commit

Permalink
Merge pull request #81 from fossology/feat/nirjas/update-interface
Browse files Browse the repository at this point in the history
feat(nirjas): Update interface with Nirjas 0.0.5

Reviewed-By: [email protected]
Tested-By: [email protected]
  • Loading branch information
ag4ums authored Jan 21, 2021
2 parents a147f95 + fdc5f15 commit 89476ad
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 56 deletions.
104 changes: 53 additions & 51 deletions atarashi/libs/commentPreprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,65 +20,69 @@
"""

import argparse
from nirjas import extract
import json
import os
import sys
import re
import string
import tempfile

from nirjas import extract as commentExtract, LanguageMapper

__author__ = "Aman Jain"
__email__ = "[email protected]"

args = None

def licenseComment(data):
list = ['source', 'free', 'under','use', 'copyright', 'grant', 'software', 'license','licence', 'agreement', 'distribute', 'redistribution', 'liability', 'rights', 'reserved', 'general', 'public', 'modify', 'modified', 'modification', 'permission','permitted' 'granted', 'distributed', 'notice', 'distribution', 'terms', 'freely', 'licensed', 'merchantibility','redistributed', 'see', 'read', '(c)', 'copying', 'legal', 'licensing', 'spdx']

MLmapCount, CSLmapCount, SLmapCount = [], [], []
comment = ""
tempCount = 0
for id, item in enumerate(data[0]["multi_line_comment"]):
count = 0
if 'spdx-license-identifier' in item['comment'].lower():
return item['comment']

for i in list:
if i in item['comment'].lower():
count+=1

if count > tempCount:
tempCount = count
comment = item['comment']

if "cont_single_line_comment" in data[0]:
for id, item in enumerate(data[0]["cont_single_line_comment"]):
count = 0
if 'spdx-license-identifier' in item['comment'].lower():
return item['comment']

for i in list:
if i in item['comment'].lower():
count+=1
if count > tempCount:
tempCount = count
comment = item['comment']

if "single_line_comment" in data[0]:
for id, item in enumerate(data[0]["single_line_comment"]):
count = 0
if 'spdx-license-identifier' in item['comment'].lower():
return item['comment']

for i in list:
if i in item['comment'].lower():
count+=1
if count > tempCount:
tempCount = count
comment = item['comment']

return comment
match_list = ['source', 'free', 'under','use', 'copyright', 'grant', 'software', 'license','licence', 'agreement', 'distribute', 'redistribution', 'liability', 'rights', 'reserved', 'general', 'public', 'modify', 'modified', 'modification', 'permission','permitted' 'granted', 'distributed', 'notice', 'distribution', 'terms', 'freely', 'licensed', 'merchantibility','redistributed', 'see', 'read', '(c)', 'copying', 'legal', 'licensing', 'spdx']

MLmapCount, CSLmapCount, SLmapCount = [], [], []
comment = ""
tempCount = 0
if "multi_line_comment" in data:
for id, item in enumerate(data["multi_line_comment"]):
count = 0
if 'spdx-license-identifier' in item['comment'].lower():
return item['comment']

for i in match_list:
if i in item['comment'].lower():
count+=1

if count > tempCount:
tempCount = count
comment = item['comment']

if "cont_single_line_comment" in data:
for id, item in enumerate(data["cont_single_line_comment"]):
count = 0
if 'spdx-license-identifier' in item['comment'].lower():
return item['comment']

for i in match_list:
if i in item['comment'].lower():
count+=1

if count > tempCount:
tempCount = count
comment = item['comment']

if "single_line_comment" in data:
for id, item in enumerate(data["single_line_comment"]):
count = 0
if 'spdx-license-identifier' in item['comment'].lower():
return item['comment']

for i in match_list:
if i in item['comment'].lower():
count+=1

if count > tempCount:
tempCount = count
comment = item['comment']

return comment


class CommentPreprocessor(object):
Expand Down Expand Up @@ -114,17 +118,15 @@ def extract(inputFile):
:return: Temp file path from the OS
'''

supportedFileExtensions = ['.py','.m4','.nsi','.c','.h','.cs','.cpp','.sep','.hxx','.cc','.css','.go','.hs','.html',
'.xml','.java','.js','.kt','.kts','.ktm','.m','.php','.pl','.r','.R','.rb','.rs','.sh','.swift','.scala',
'.sc','.txt','.lic','.install','.OSS','.gl']

supportedFileExtensions = list(LanguageMapper.LANG_MAP.keys())

fd, outputFile = tempfile.mkstemp()
fileType = os.path.splitext(inputFile)[1]

with open(outputFile, 'w') as outFile:
# if the file extension is supported
if fileType in supportedFileExtensions:
data_file = extract(inputFile)
data_file = commentExtract(inputFile)
data = json.loads(data_file)
data1 = licenseComment(data)
outFile.write(data1)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ requires = [
"scipy>=0.18.1",
"textdistance>=3.0.3",
"pyxDamerauLevenshtein>=1.5",
"nirjas>=0.0.3",
"nirjas>=0.0.5",
"urllib3>=1.24.1"
]
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ scipy>=0.18.1
spacy>=2.0.11
textdistance>=3.0.3
setuptools>=39.2.0
nirjas>=0.0.3
urllib3>=1.24.1
nirjas>=0.0.5
urllib3>=1.24.1
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def read(fname):
'tqdm>=4.23.4',
'pandas>=0.23.1',
'urllib3>=1.24.1',
'nirjas>=0.0.3'
'nirjas>=0.0.5'
]

requirements = [
Expand All @@ -68,7 +68,7 @@ def read(fname):
'textdistance>=3.0.3',
'pyxDamerauLevenshtein>=1.5',
'urllib3>=1.24.1',
'nirjas>=0.0.3'
'nirjas>=0.0.5'
]

class BuildAtarashiDependencies(distutils.cmd.Command):
Expand Down

0 comments on commit 89476ad

Please sign in to comment.