Skip to content

Commit

Permalink
Merge pull request #163 from projectdiscovery/dev
Browse files Browse the repository at this point in the history
v0.0.2
  • Loading branch information
ehsandeep committed Nov 11, 2022
2 parents 22fa3fe + ea37eee commit 4301a61
Show file tree
Hide file tree
Showing 14 changed files with 244 additions and 26 deletions.
19 changes: 13 additions & 6 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
name: 🔨 Build Test

on:
push:
pull_request:
workflow_dispatch:
pull_request:
branches:
- dev

jobs:
build:
Expand All @@ -11,6 +13,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]

steps:
- name: Set up Go
uses: actions/setup-go@v3
Expand All @@ -20,14 +23,18 @@ jobs:
- name: Check out code
uses: actions/checkout@v3

- name: Test
run: go test ./...
working-directory: .

- name: Build
run: go build .
working-directory: cmd/katana/

- name: Test
run: go test ./...
working-directory: .
- name: Install
run: go install
working-directory: cmd/katana/

- name: Race Condition Tests
run: go build -race .
working-directory: cmd/katana/
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ name: 🚨 CodeQL Analysis

on:
workflow_dispatch:
push:
pull_request:
branches:
- dev
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/lint-test.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
name: 🙏🏻 Lint Test

on:
push:
pull_request:
workflow_dispatch:
pull_request:
branches:
- dev

jobs:
lint:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/release-binary.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: 🎉 Release Binary

on:
create:
tags:
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
FROM golang:1.19.2-alpine AS builder
FROM golang:1.19.3-alpine AS builder
RUN apk add --no-cache git
RUN go install -v github.com/projectdiscovery/katana/cmd/katana@latest

FROM alpine:3.16.2
RUN apk -U upgrade --no-cache \
&& apk add --no-cache bind-tools ca-certificates
&& apk add --no-cache bind-tools ca-certificates chromium
COPY --from=builder /go/bin/katana /usr/local/bin/

ENTRYPOINT ["katana"]
87 changes: 78 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,32 @@ katana requires **Go 1.18** to install successfully. To install, just run the be
go install github.com/projectdiscovery/katana/cmd/katana@latest
```

**More options to install / run katana-**

<details>
<summary>Docker</summary>

> To install / update docker to latest tag -
```sh
docker pull projectdiscovery/katana:latest
```

> To run katana in standard mode using docker -

```sh
docker run projectdiscovery/katana:latest -u https://tesla.com
```

> To run katana in headless mode using docker -
```sh
docker run projectdiscovery/katana:latest -u https://tesla.com -system-chrome -headless
```

</details>

## Usage

```console
Expand Down Expand Up @@ -77,9 +103,11 @@ CONFIGURATION:
-fc, -form-config string path to custom form configuration file

HEADLESS:
-hl, -headless enable headless hybrid crawling (experimental)
-sc, -system-chrome use local installed chrome browser instead of katana installed
-sb, -show-browser show the browser on the screen with headless mode
-hl, -headless enable headless hybrid crawling (experimental)
-sc, -system-chrome use local installed chrome browser instead of katana installed
-sb, -show-browser show the browser on the screen with headless mode
-ho, -headless-options string[] start headless chrome with additional options
-nos, -no-sandbox start headless chrome in --no-sandbox mode

SCOPE:
-cs, -crawl-scope string[] in scope url regex to be followed by crawler
Expand Down Expand Up @@ -226,8 +254,30 @@ HEADLESS:
-hl, -headless enable experimental headless hybrid crawling
-sc, -system-chrome use local installed chrome browser instead of katana installed
-sb, -show-browser show the browser on the screen with headless mode
-ho, -headless-options string[] start headless chrome with additional options
-nos, -no-sandbox start headless chrome in --no-sandbox mode
```

*`-no-sandbox`*
----

Runs headless chrome browser with **no-sandbox** option, useful when running as root user.

```console
katana -u https://tesla.com -headless -no-sandbox
```

*`-headless-options`*
----

When crawling in headless mode, additional chrome options can be specified using `-headless-options`, for example -


```console
katana -u https://tesla.com -headless -system-chrome -headless-options --disable-gpu,proxy-server=http://127.0.0.1:8080
```


## Scope Control

Crawling can be endless if not scoped, as such katana comes with multiple support to define the crawl scope.
Expand All @@ -240,7 +290,7 @@ Most handy option to define scope with predefined field name, `rdn` being defaul
- `fqdn` - crawling scoped to given sub(domain)
- `dn` - crawling scoped to domain name keyword

```
```console
katana -u https://tesla.com -fs dn
```

Expand All @@ -250,7 +300,7 @@ katana -u https://tesla.com -fs dn

For advanced scope control, `-cs` option can be used that comes with **regex** support.

```
```console
katana -u https://tesla.com -cs login
```

Expand All @@ -265,7 +315,7 @@ app/
wordpress/
```

```
```console
katana -u https://tesla.com -cs in_scope.txt
```

Expand All @@ -275,7 +325,7 @@ katana -u https://tesla.com -cs in_scope.txt

For defining what not to crawl, `-cos` option can be used and also support **regex** input.

```
```console
katana -u https://tesla.com -cos logout
```

Expand All @@ -288,7 +338,7 @@ $ cat out_of_scope.txt
/log_out
```

```
```console
katana -u https://tesla.com -cos out_of_scope.txt
```

Expand All @@ -297,7 +347,7 @@ katana -u https://tesla.com -cos out_of_scope.txt

Katana is default to scope `*.domain`, to disable this `-ns` option can be used and also to crawl the internet.

```
```console
katana -u https://tesla.com -ns
```

Expand Down Expand Up @@ -475,6 +525,25 @@ https_www.tesla.com_qurl.txt
</tr>
</table>


*`-extension-match`*
---

Crawl output can be easily matched for specfic extension using `-em` option to ensure to display only output containing given extension.

```
katana -u https://tesla.com -silent -em js,jsp,json
```

*`-extension-filter`*
---

Crawl output can be easily filtered for specfic extension using `-ef` option which ensure to remove all the urls containing given extension.

```
katana -u https://tesla.com -silent -ef css,txt,md
```

Here are additonal filter options -

```console
Expand Down
8 changes: 7 additions & 1 deletion cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ func main() {

runner, err := runner.New(options)
if err != nil || runner == nil {
gologger.Fatal().Msgf("could not create runner: %s\n", err)
if options.Version {
return
} else {
gologger.Fatal().Msgf("could not create runner: %s\n", err)
}
}
defer runner.Close()

Expand Down Expand Up @@ -77,6 +81,8 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.BoolVarP(&options.Headless, "headless", "hl", false, "enable headless hybrid crawling (experimental)"),
flagSet.BoolVarP(&options.UseInstalledChrome, "system-chrome", "sc", false, "use local installed chrome browser instead of katana installed"),
flagSet.BoolVarP(&options.ShowBrowser, "show-browser", "sb", false, "show the browser on the screen with headless mode"),
flagSet.StringSliceVarP(&options.HeadlessOptionalArguments, "headless-options", "ho", nil, "start headless chrome with additional options", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.BoolVarP(&options.HeadlessNoSandbox, "no-sandbox", "nos", false, "start headless chrome in --no-sandbox mode"),
)

flagSet.CreateGroup("scope", "Scope",
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ require (
github.com/shirou/gopsutil/v3 v3.22.10
github.com/stretchr/testify v1.8.1
go.uber.org/multierr v1.8.0
golang.org/x/net v0.1.0
golang.org/x/net v0.2.0
gopkg.in/yaml.v3 v3.0.1
)

Expand Down Expand Up @@ -68,7 +68,7 @@ require (
go.uber.org/atomic v1.9.0 // indirect
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 // indirect
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
golang.org/x/sys v0.1.0 // indirect
golang.org/x/sys v0.2.0 // indirect
golang.org/x/text v0.4.0 // indirect
golang.org/x/tools v0.1.12 // indirect
)
7 changes: 5 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,9 @@ golang.org/x/net v0.0.0-20220617184016-355a448f1bc9/go.mod h1:XRhObCWvk6IyKnWLug
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.0.0-20220728211354-c7608f3a8462/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0=
golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
golang.org/x/net v0.2.0 h1:sZfSu1wtKLGlWI4ZZayP0ck9Y73K1ynO6gqzTdBVdPU=
golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
Expand Down Expand Up @@ -283,12 +284,14 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220731174439-a90be440212d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
Expand Down
2 changes: 1 addition & 1 deletion internal/runner/banner.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ var banner = fmt.Sprintf(`
/_/\_\\_,_/\__/\_,_/_//_/\_,_/ %s
`, version)

var version = "v0.0.1"
var version = "v0.0.2"

// showBanner is used to show the banner to the user
func showBanner() {
Expand Down
3 changes: 3 additions & 0 deletions internal/runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ func validateOptions(options *types.Options) error {
if len(options.URLs) == 0 && !fileutil.HasStdin() {
return errors.New("no inputs specified for crawler")
}
if (options.HeadlessOptionalArguments != nil || options.HeadlessNoSandbox) && !options.Headless {
return errors.New("headless mode (-hl) is required if -ho or -nos are set")
}
gologger.DefaultLogger.SetFormatter(formatter.NewCLI(options.NoColors))
return nil
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/engine/hybrid/hybrid.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/PuerkitoBio/goquery"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/launcher/flags"
"github.com/pkg/errors"
"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/katana/pkg/engine/common"
Expand Down Expand Up @@ -76,6 +77,14 @@ func New(options *types.CrawlerOptions) (*Crawler, error) {
chromeLauncher = chromeLauncher.Headless(true)
}

if options.Options.HeadlessNoSandbox {
chromeLauncher.Set("no-sandbox", "true")
}

for k, v := range options.Options.ParseHeadlessOptionalArguments() {
chromeLauncher.Set(flags.Flag(k), v)
}

launcherURL, err := chromeLauncher.Launch()
if err != nil {
return nil, err
Expand Down
18 changes: 18 additions & 0 deletions pkg/types/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ type Options struct {
UseInstalledChrome bool
// ShowBrowser specifies whether the show the browser in headless mode
ShowBrowser bool
// HeadlessOptionalArguments specifies optional arguments to pass to Chrome
HeadlessOptionalArguments goflags.StringSlice
// HeadlessNoSandbox specifies if chrome should be start in --no-sandbox mode
HeadlessNoSandbox bool
}

func (options *Options) ParseCustomHeaders() map[string]string {
Expand All @@ -90,3 +94,17 @@ func (options *Options) ParseCustomHeaders() map[string]string {
}
return customHeaders
}

func (options *Options) ParseHeadlessOptionalArguments() map[string]string {
optionalArguments := make(map[string]string)
for _, v := range options.HeadlessOptionalArguments {
if argParts := strings.SplitN(v, "=", 2); len(argParts) >= 2 {
key := strings.TrimSpace(argParts[0])
value := strings.TrimSpace(argParts[1])
if key != "" && value != "" {
optionalArguments[key] = value
}
}
}
return optionalArguments
}

0 comments on commit 4301a61

Please sign in to comment.