--变化太少--在各种语言的源代码中查找重复项

tlv的Python项目详细描述


太少的变化-一个发现各种代码重复的工具 语言

目的

在许多文件中很难跟踪代码复制。有工具 存在似 CPD(来自pmd) 它们做得很好,但它们需要Java(甚至更多 (晦涩的语言)奔跑。所以我决定写一个小工具 Pygments和python difflib要捕获 尽可能多语言的代码复制

要求

安装

pip install tlv

用法

usage: tlv [-h][--minlines MINLINES][--mintoken MINTOKEN][-j JOBS][--lexer {...}][--wildcard_token {...}][--verbose][--nodetails]
           files [files ...]

Find code duplications across various languages
positional arguments:
  files                 Files to parse

optional arguments:
  -h, --help            show this help message and exit
  --minlines MINLINES   Minimum length of block in lines before reporting
  --mintoken MINTOKEN   Minimum length of block in token before reporting
  -j JOBS, --jobs JOBS  Number of jobs to run in parallel
  --lexer {3d,aap,actionscript,actionscript3,ada,ada2005,ada95,adl,agda,ahk,al,alloy,amienttalk,amienttalk/2,ampl,an,antlr,antlr-actionscript,antlr-as,antlr-c#,antlr-cpp,antlr-csharp,antlr-java,antlr-perl,antlr-python,antlr-r,antlr-ruy,apachecon,apl,applescript,arduino,arexx,as,as3,asemake,asic,asm,aspectj,aspx-cs,asy,asymptote,at,atch,augeas,autohotkey,autoit,awk,c,c#,c++,c++-ojdum,c-ojdump,ca65,cadl,camkes,capdl,capnp,casic,ceylon,chai,chaiscript,chapel,charmci,cheetah,chpl,cirru,cl,clay,clean,clj,cljs,clojure,clojurescript,cmake,cmas,co,code,common-lisp,componentpascal,console,control,cool,coq,cp,cpp,cpp-ojdump,cpsa,cr,crmsh,croc,cry,cryptol,crystal,csh,csharp,csound,csound-csd,csound-document,csound-orc,csound-sco,csound-score,css,css+django,css+er,css+genshi,css+genshitext,css+jinja,css+lasso,css+mako,css+mozpreproc,css+php,css+ruy,css+smarty,cu,cucumer,cuda,cxx-ojdump,cypher,cython,d,d-ojdump,dart,dasm16,decontrol,delphi,desources,dg,di,django,do,docker,dosatch,doscon,dpatch,dtd,duy,dylan,dylan-console,dylan-lid,dylan-repl,e,earl-grey,earlgrey,easytrieve,ec,ecl,eg,ei,elisp,elixir,elm,emacs,emacs-lisp,en,er,erl,erlang,evoque,ex,exs,extempore,ezhil,gap,gas,gawk,genshi,genshitext,gherkin,glsl,gnuplot,go,golo,gooddata-cl,gosu,gro,groovy,gst,haml,handlears,haskell,haxe,haxeml,hexdump,hlsl,hs,hsa,hsail,hspec,html,html+cheetah,html+django,html+er,html+evoque,html+genshi,html+handlears,html+jinja,html+kid,html+lasso,html+mako,html+myghty,html+ng2,html+php,html+ruy,html+smarty,html+spit,html+twig,htmldjango,http,hx,hxml,hxsl,hylang,i,i6t,idl,idl4,idr,idris,iex,igor,igorpro,ik,in,ini,io,ioke,ir,irc,isaelle,itex,j,jade,jags,jasmin,jasminxt,java,javascript,javascript+cheetah,javascript+django,javascript+er,javascript+genshi,javascript+genshitext,javascript+jinja,javascript+lasso,javascript+mako,javascript+myghty,javascript+php,javascript+ruy,javascript+smarty,jcl,jinja,jl,jlcon,jproperties,js,js+cheetah,js+django,js+er,js+genshi,js+genshitext,js+jinja,js+lasso,js+mako,js+myghty,js+php,js+ruy,js+smarty,js+spit,jsg,json,json-ld,json-oject,jsonld,jsp,julia,juttle,kal,kcon,kid,koka,kotlin,lagda,lasso,lassoscript,lcry,lcryptol,lean,less,lhaskell,lhs,lid,lidr,lidris,lighttpd,lighty,limo,liquid,lisp,literate-agda,literate-cryptol,literate-haskell,literate-idris,litzasic,litzmax,live-script,livescript,llvm,logos,logtalk,lua,m2,make,make,mako,maql,mask,mason,mathematica,matla,matlasession,mawk,max,md,minid,mma,modelica,modula2,moin,monkey,monte,moo,moocode,moon,moonscript,mq4,mq5,mql,mql4,mql5,msc,mscgen,mupad,mxml,myghty,mysql,n,nasm,nawk,ncl,nesc,newlisp,newspeak,ng2,nginx,nim,nimrod,nit,nix,nixos,nn,nsh,nsi,nsis,numpy,nusmv,oa,ocaml,octave,odin,oj-c,oj-c++,oj-j,ojc,ojc++,ojdump,ojdump-nasm,ojective-c,ojective-c++,ojective-j,ojectivec,ojectivec++,ojectivej,ojectpascal,ojj,oo,ooc,oogie,opa,openedge,openugs,pacmancon,pan,parasail,pas,pascal,pawn,pcmk,perl,perl6,php,php3,php4,php5,pig,pike,pkgcon,pl,pl6,plpgsql,plus,po,posh,postgres,postgres-console,postgresql,postgresql-console,postscr,postscript,pot,pov,powershell,praat,progress,prolog,properties,protou,ps1,ps1con,psm1,psql,pug,puppet,py,py3,py3t,pycon,pypy,pypylog,pyrex,pyt,python,python3,pyx,qasic,qml,qs,qvt,qvto,r,racket,ragel,ragel-c,ragel-cpp,ragel-d,ragel-em,ragel-java,ragel-ojc,ragel-r,ragel-ruy,rain,raw,rcon,rd,red,red/system,redcode,registry,reol,resource,resourceundle,rexx,rhtml,rkt,rnc,rng-compact,ro,roocon,root,rql,rs,rsl,rts,rust,ruy,s,sage,salt,sarl,sas,sass,satch,sc,scala,scaml,scheme,scila,scm,scss,shell-session,silver,slash,slim,sls,slurm,smali,smalltalk,smarty,sml,snool,sources.list,sourceslist,sp,sparql,spec,spit,splus,sql,sqlite3,squeak,squidcon,st,st-pytex,stan,stata,supercollider,sv,swi,swig,systemverilog,t-sql,tads3,tap,tasm,tcl,tcsh,tcshcon,tea,teraterm,teratermmacro,termcap,termin,terra,thri,todotxt,toml,tra,trac-wiki,treetop,ts,tsql,ttl,turtle,twig,typescript,typoscript,typoscriptcssdata,typoscripthtmldata,ucode,ugs,unicon,uriscript,v.net,vala,vapi,vcl,vclsnippet,vclsnippets,vctreestatus,velocity,vgl,vhdl,vim,vnet,vscript,wdi,whiley,winatch,winugs,x10,xml,xml+cheetah,xml+django,xml+er,xml+evoque,xml+genshi,xml+jinja,xml+kid,xml+lasso,xml+mako,xml+myghty,xml+php,xml+ruy,xml+smarty,xml+spit,xorg,xq,xql,xqm,xquery,xqy,xten,xtend,yaml,yaml+jinja}
                        Manually set a lexer to use on all files
  --wildcard_token {Token.Keyword,Token.Keyword.Constant,Token.Keyword.Declaration,Token.Keyword.Namespace,Token.Keyword.Pseudo,Token.Keyword.Reserved,Token.Keyword.Type,Token.Name,Token.Name.Attribute,Token.Name.Builtin,Token.Name.Builtin.Pseudo,Token.Name.Class,Token.Name.Constant,Token.Name.Decorator,Token.Name.Entity,Token.Name.Exception,Token.Name.Function,Token.Name.Function.Magic,Token.Name.Label,Token.Name.Namespace,Token.Name.Other,Token.Name.Tag,Token.Name.Variable,Token.Name.Variable.Class,Token.Name.Variable.Global,Token.Name.Variable.Instance,Token.Name.Variable.Magic,Token.Literal,Token.Literal.Date,Token.Literal.String,Token.Literal.String.Affix,Token.Literal.String.Backtick,Token.Literal.String.Char,Token.Literal.String.Delimiter,Token.Literal.String.Doc,Token.Literal.String.Double,Token.Literal.String.Escape,Token.Literal.String.Heredoc,Token.Literal.String.Interpol,Token.Literal.String.Other,Token.Literal.String.Regex,Token.Literal.String.Single,Token.Literal.String.Symbol,Token.Literal.Number,Token.Literal.Number.Bin,Token.Literal.Number.Float,Token.Literal.Number.Hex,Token.Literal.Number.Integer,Token.Literal.Number.Integer.Long,Token.Literal.Number.Oct,Token.Operator,Token.Operator.Word,Token.Punctuation,Token.Comment,Token.Comment.Hashbang,Token.Comment.Multiline,Token.Comment.Preproc,Token.Comment.Single,Token.Comment.Special,Token.Generic,Token.Generic.Deleted,Token.Generic.Emph,Token.Generic.Error,Token.Generic.Heading,Token.Generic.Inserted,Token.Generic.Output,Token.Generic.Prompt,Token.Generic.Strong,Token.Generic.Subheading,Token.Generic.Traceback,Token.Text.Whitespace}
                        Token types that are threated as wildcards (actual
                        value doesn't matter)
  --verbose             Verbose output
  --nodetails           Dump the details of a finding

默认情况下,工具根据文件名猜测内容类型,如果 对您不起作用,请参见下面的

指定lexer

通过使用–lexer=运行该工具,可以使用特定的lexer。 选择。执行此操作时,所有输入文件都由指定的 莱克瑟。因此,使用此选项时,应确保所有传递的文件 属于指定的文件,否则结果可能是假的。

输出

输出将写入stdout。有两种可能的查找类型

  • 重复-代码完全相同
  • toolessvariation-删除所有标记时,代码相同 由–通配符标记指定的类型

输出示例

/someplace/busybox/modutils/modutils.c:9:0:[TooLessVariation]:Block till 16:0 is nearly the same as in ../modprobe-small.c from 29:0 till 36:0
>>> #include <sys/syscall.h>
>>>
>>> #define init_module(mod, len, opts) syscall(__NR_init_module, mod, len, opts)
>>> #if defined(__NR_finit_module)
>>> # define finit_module(fd, uargs, flags) syscall(__NR_finit_module, fd, uargs, flags)
>>> #endif
>>> #define delete_module(mod, flags) syscall(__NR_delete_module, mod, flags)
>>>
<<<
>>> #include <sys/syscall.h>
>>>
>>> #define init_module(mod, len, opts) syscall(__NR_init_module, mod, len, opts)
>>> #define delete_module(mod, flags) syscall(__NR_delete_module, mod, flags)
>>> #ifdef __NR_finit_module
>>> # define finit_module(fd, uargs, flags) syscall(__NR_finit_module, fd, uargs, flags)
>>> #endif
>>>
<<<
/someplace/busybox/modutils/modutils.c:165:32:[Duplicate]:Block till 169:3 is the same as in ../modprobe-small.c from 324:18 till 327:0
>>>
>>>     fstat(fd, &st);
>>>     image = NULL;
>>>     /* st.st_size is off_t, we can't just pass it to mmap */
>>>     if
<<<
/someplace/busybox/modutils/modprobe-small.c:236:15:[Duplicate]:Block till 240:10 is the same as in ../modutils.c from 265:26 till 269:10
>>>
>>>     case ENOEXEC:
>>>         return "invalid module format";
>>>     case ENOENT:
>>>         return "
<<<
/someplace/busybox/modutils/modutils.c:115:0:[Duplicate]:Block till 130:43 is the same as in ../modprobe-small.c from 177:0 till 194:36
>>>
>>> #if ENABLE_FEATURE_CMDLINE_MODULE_OPTIONS
>>> char* FAST_FUNC parse_cmdline_module_options(char **argv, int quote_spaces)
>>> {
>>>     char *options;
>>>     int optlen;
>>>
>>>     options = xzalloc(1);
>>>     optlen = 0;
>>>     while (*++argv) {
>>>         const char *fmt;
>>>         const char *var;
>>>         const char *val;
>>>
>>>         var = *argv;
>>>         options = xrealloc(options, optlen + 2 +
<<<

如果不想看到代码本身,请传递–nodetails 选项

进一步阅读

错误和贡献

可以随意创建问题或拉取请求

欢迎加入QQ群-->: 979659372 Python中文网_新手群

推荐PyPI第三方库


热门话题
安卓软件包与java代码中的类型冲突   谷歌应用引擎Java还是Python?   如何将java bean传递到jsp页面,以便jqQrid使用json显示?   在编译kotlin代码时,kotlin编译器如何处理java代码?   java不准确地更改JTextPane中的文本颜色   反应式编程AWS SDK v2 SdkAsyncHttpClient使用Java 11 Java实现。网http HttpClient sendAsync   在Spring AMQP中,java根据队列的消费者计数来消费队列   java在ArrayList的add()方法中创建新对象会导致内存泄漏。我能做什么不同的事?   未将java BufferedReader特定行追加到字符串   用于聊天程序格式化的java JavaFX 2文本区   java如何从netbeans项目生成exexutable文件?   swing如何在Java中使用JButton操作调整JWindow的宽度和高度?   java有没有办法在spring boot中使用jasypt aes加密和解密?   java通过使用泛型如何将映射作为集合传递给方法?   java如何替换不推荐使用的构造函数DynamoDBMapperFieldModel