-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bypass request delay when request is cancelled #619
base: master
Are you sure you want to change the base?
Changes from 6 commits
7ed6592
cd5d8db
842b7e7
191b0b7
6c602af
e0c3c8b
a1e4bc8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
package colly | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"math/rand" | ||
"net/http" | ||
"net/http/cookiejar" | ||
"net/http/httptest" | ||
"strconv" | ||
"strings" | ||
"sync" | ||
"testing" | ||
"time" | ||
) | ||
|
||
func TestHTTPBackendDoCancelation(t *testing.T) { | ||
rand.Seed(time.Now().Unix()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's better to not have any indeterminate behaviour in tests. Either just make make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Made a commit with a hardcoded seed. :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd still advise you to use |
||
|
||
// rand up to 10 to not extend the test duration too much | ||
p := 1 + rand.Intn(5) // p: parallel requests | ||
n := p + p*rand.Intn(10) // n: after n, cancel will be called; ensure 1 calls per worker + rand | ||
c := n + p*2 + rand.Intn(10) // c: total number of calls; ensure 2 calls per worker after cancel is called + rand | ||
|
||
ts := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { | ||
fmt.Fprint(rw, "OK") | ||
})) | ||
defer ts.Close() | ||
|
||
checkHeadersFunc := func(req *http.Request, statusCode int, header http.Header) bool { return true } | ||
|
||
ctx, cancel := context.WithCancel(context.Background()) | ||
defer cancel() | ||
|
||
backend := &httpBackend{} | ||
jar, _ := cookiejar.New(nil) | ||
backend.Init(jar) | ||
limit := &LimitRule{ | ||
DomainRegexp: ".*", | ||
Parallelism: p, | ||
Delay: 5 * time.Millisecond, | ||
} | ||
backend.Limit(limit) | ||
|
||
var wg sync.WaitGroup | ||
wg.Add(c) | ||
|
||
out := make(chan error) | ||
|
||
for i := 0; i < c; i++ { | ||
go func(i int) { | ||
defer wg.Done() | ||
req, _ := http.NewRequest("GET", ts.URL+"/"+strconv.Itoa(i), nil) | ||
req = req.WithContext(ctx) | ||
|
||
_, err := backend.Do(req, 0, checkHeadersFunc) | ||
out <- err | ||
}(i) | ||
} | ||
|
||
go func() { | ||
wg.Wait() | ||
close(out) | ||
}() | ||
|
||
i := 0 | ||
nonEarlyCount := 0 | ||
for err := range out { | ||
i++ | ||
if i == n { | ||
cancel() | ||
} | ||
|
||
if i <= n { | ||
if err != nil { | ||
t.Errorf("no error was expected for the first %d responses; error: %q", n, err) | ||
} | ||
} else { | ||
errStr := "" | ||
if err != nil { | ||
errStr = err.Error() | ||
} | ||
|
||
// non early returns are allowed up to the number of maximum allowed concurrent requests; | ||
// bacause those requests could be already running when cancel was called | ||
if !strings.Contains(errStr, "early return") { | ||
if nonEarlyCount > p { | ||
t.Errorf("count of non early return is above the number of maximum allowed concurrent requests; p: %d; n: %d; c: %d", p, n, c) | ||
} | ||
nonEarlyCount++ | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe earlier version of this PR returned
ctx.Err()
instead. Why did you decide to return a new error instead?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that was the case. Usually i would use wrap from
github.com/pkg/errors
and just add to the error but the library is not in use here.My first try at writing the code I was asserting against the time that it took to run all the calls and check if they return asap after the cancel; worked fine locally but fail on CI and yea, check like that is really unreliable.
So my second attempt its based on the error message; to check if the error was the normal error from the request or if it was the early return; I had to make the early return message different from
ctx.Err()
cause that is what we get from the http client that tries to run with a cancelled request.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for the late response. I don't think tests are good enough reason to return different errors (depending on where it happened) when context is cancelled. Are you aware of any other library that does not return
ctx.Err()
unchanged?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No worries, any suggestions on how to get the test done?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suppose you could make a test like this:
What do you think?