Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 53 additions & 28 deletions Finder/gitfinder.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,49 @@
#!/usr/bin/python
import sys, os, argparse
from urllib.request import urlopen
#!/usr/bin/env python3

'''
Finder is part of https://github.com/internetwache/GitTools

Developed and maintained by @gehaxelt from @internetwache

Use at your own risk. Usage might be illegal in certain circumstances.
Only for educational purposes!
'''

import argparse
from functools import partial
from multiprocessing import Pool
from urllib.request import urlopen
from urllib.error import HTTPError
import sys


def findgitrepo(domain):
domain = domain.strip()
def findgitrepo(output_file, domains):
domain = domains.strip()

try:
# Try to download http://target.tld/.git/HEAD
req = urlopen('http://' + domain + "/.git/HEAD", timeout=5)
answer = req.read(200).decode()
with urlopen(''.join(['http://', domain, '/.git/HEAD']), timeout=5) as response:
answer = response.read(200).decode()

# Check if refs/heads is in the file
if(not 'refs/heads' in answer):
return
except HTTPError:
return

# Write match to OUTPUTFILE
fHandle = open(OUTPUTFILE,'a')
fHandle.write(domain + "\n")
fHandle.close()
# Check if refs/heads is in the file
if 'refs/heads' not in answer:
return

print("[*] Found: " + domain)
# Write match to output_file
with open(output_file, 'a') as file_handle:
file_handle.write(''.join([domain, '\n']))

except Exception as e:
return
print(''.join(['[*] Found: ', domain]))

if __name__ == '__main__':

def read_file(filename):
with open(filename) as file:
return file.readlines()

def main():
print("""
###########
# Finder is part of https://github.com/internetwache/GitTools
Expand All @@ -35,25 +53,32 @@ def findgitrepo(domain):
# Use at your own risk. Usage might be illegal in certain circumstances.
# Only for educational purposes!
###########
""")
""")

# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--inputfile', default='input.txt', help='input file')
parser.add_argument('-o', '--outputfile', default='output.txt', help='output file')
parser.add_argument('-t', '--threads', default=200, help='threads')
args = parser.parse_args()

DOMAINFILE=args.inputfile
OUTPUTFILE=args.outputfile
MAXPROCESSES=int(args.threads)
domain_file = args.inputfile
output_file = args.outputfile
try:
max_processes = int(args.threads)
except ValueError as err:
sys.exit(err)

try:
domains = open(DOMAINFILE, "r").readlines()
except FileNotFoundError as e:
print(e)
exit(e.errno)
domains = read_file(domain_file)
except FileNotFoundError as err:
sys.exit(err)

fun = partial(findgitrepo, output_file)
print("Scanning...")
pool = Pool(processes=MAXPROCESSES)
pool.map(findgitrepo, domains)
with Pool(processes=max_processes) as pool:
pool.imap_unordered(fun, domains)
print("Finished")

if __name__ == '__main__':
main()
24 changes: 17 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# GitTools

This repository contains three small python/bash scripts used for the Git research. [Read about it here](http://en.internetwache.org/dont-publicly-expose-git-or-how-we-downloaded-your-websites-sourcecode-an-analysis-of-alexas-1m-28-07-2015/)
This repository contains three small python/bash scripts used for the Git research. [Read about it here](https://en.internetwache.org/dont-publicly-expose-git-or-how-we-downloaded-your-websites-sourcecode-an-analysis-of-alexas-1m-28-07-2015/)

## Finder

Expand All @@ -16,7 +16,17 @@ This python script identifies websites with publicly accessible ```.git``` repos
It checks if the ```.git/HEAD``` file contains ```refs/heads```.

```
./gitfinder.py -h
$ ./gitfinder.py -h

###########
# Finder is part of https://github.com/internetwache/GitTools
#
# Developed and maintained by @gehaxelt from @internetwache
#
# Use at your own risk. Usage might be illegal in certain circumstances.
# Only for educational purposes!
###########

usage: gitfinder.py [-h] [-i INPUTFILE] [-o OUTPUTFILE] [-t THREADS]

optional arguments:
Expand All @@ -38,7 +48,7 @@ The script will output discovered domains in the form of ```[*] Found: DOMAIN```
wget http://s3.amazonaws.com/alexa-static/top-1m.csv.zip
unzip top-1m.csv.zip
sed -i.bak 's/.*,//' top-1m.csv
python3 ./gitfinder.py -i top-1m.csv
./gitfinder.py -i top-1m.csv
```

## Dumper
Expand All @@ -48,7 +58,7 @@ This tool can be used to download as much as possible from the found .git reposi
### Usage

```
./gitdumper.sh -h
$ ./gitdumper.sh -h

[*] USAGE: http://target.tld/.git/ dest-dir [--git-dir=otherdir]
--git-dir=otherdir Change the git folder name. Default: .git
Expand All @@ -71,7 +81,7 @@ This script tries to recover incomplete git repositories:
### Usage

```
./extractor.sh /tmp/mygitrepo /tmp/mygitrepodump
$ ./extractor.sh /tmp/mygitrepo /tmp/mygitrepodump
```
where
- ```/tmp/mygitrepo``` contains a ```.git``` directory
Expand All @@ -89,11 +99,11 @@ Here's a small demo of the **Dumper** tool:

## Requirements
* git
* python
* Python 3+
* curl
* bash
* sed

# License

All tools are licensed using the MIT license. See LICENSE.md
All tools are licensed using the MIT license. See [LICENSE.md](LICENSE.md)