Re: [PATCH] Replace HTTP links with HTTPS ones: Documentation/translations/it_IT

From: Alexander A. Klimov
Date: Thu Jun 11 2020 - 07:02:34 EST

Next message: Christian Brauner: "vdso_join_timens() question"
Previous message: Mark Brown: "Re: [PATCH 4/6] spi: altera: use regmap instead of direct mmio register access"
In reply to: Miguel Ojeda: "Re: [PATCH] Replace HTTP links with HTTPS ones: Documentation/translations/it_IT"
Next in thread: Miguel Ojeda: "Re: [PATCH] Replace HTTP links with HTTPS ones: Documentation/translations/it_IT"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Am 11.06.20 um 12:40 schrieb Miguel Ojeda:

On Thu, Jun 11, 2020 at 9:02 AM Alexander A. Klimov
<grandmaster@xxxxxxxxxxxx> wrote:

Is any of you familiar with Golang?

Don't worry about that! I'd expect seasoned C programmers to be able
to read Go (or near languages) -- at least to have a general idea of
what an algorithm does.

It is not APL, after all :-)

Fine.

package main

import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
fatomic "github.com/natefinch/atomic"
"golang.org/x/sync/semaphore"
"io"
"io/ioutil"
"net/http"
"os"
"os/exec"
"regexp"
"runtime"
"sync"
"sync/atomic"
"time"
)

var fileSemaphore = semaphore.NewWeighted(int64(runtime.NumCPU()) * 16)
var wg sync.WaitGroup

var processingFiles = struct {
sync.RWMutex

files map[string]struct{}
}{
files: map[string]struct{}{},
}

func main() {
cmd := exec.Command("git", "ls-files", "-sz")
cmd.Stderr = os.Stderr

out, errSP := cmd.StdoutPipe()
if errSP != nil {
fmt.Fprintln(os.Stderr, errSP.Error())
os.Exit(1)
}

if errSt := cmd.Start(); errSt != nil {
fmt.Fprintln(os.Stderr, errSt.Error())
os.Exit(1)
}

//go printProcessingFiles()

buf := bufio.NewReader(out)
for {
file, errRB := buf.ReadBytes(0)
if errRB != nil && errRB != io.EOF {
if errWt := cmd.Wait(); errWt != nil {
fmt.Fprintln(os.Stderr, errWt.Error())
wg.Wait()
os.Exit(1)
}

fmt.Fprintln(os.Stderr, errRB.Error())
wg.Wait()
os.Exit(1)
}

if bytes.HasPrefix(file, []byte{'1', '0', '0'}) {
if fields := bytes.SplitN(bytes.SplitN(file, []byte{0}, 2)[0], []byte{9}, 2); len(fields) == 2 {
_ = fileSemaphore.Acquire(context.Background(), 1)
wg.Add(1)
go processFile(string(fields[1]))
}
}

if errRB == io.EOF {
break
}
}

wg.Wait()

if errWt := cmd.Wait(); errWt != nil {
fmt.Fprintln(os.Stderr, errWt.Error())
os.Exit(1)
}
}

/*
func printProcessingFiles() {
for {
time.Sleep(time.Second)

processingFiles.RLock()
fmt.Fprintln(os.Stderr, processingFiles.files)
processingFiles.RUnlock()
}
}
*/

var httpLink = regexp.MustCompile(`\bhttp://[^# \t\r\n]*(?:\w|/)`)
var xmlns = regexp.MustCompile(`\bxmlns\b`)

func processFile(file string) error {
defer fileSemaphore.Release(1)
defer wg.Done()

processingFiles.Lock()
processingFiles.files[file] = struct{}{}
processingFiles.Unlock()

defer func() {
processingFiles.Lock()
delete(processingFiles.files, file)
processingFiles.Unlock()
}()

content, errRF := ioutil.ReadFile(file)
if errRF != nil {
return errRF
}

lines := bytes.Split(content, []byte{'\n'})
for i := range lines {
if !xmlns.Match(lines[i]) {
lines[i] = httpLink.ReplaceAllFunc(lines[i], processLink)
}
}

if modified := bytes.Join(lines, []byte{'\n'}); bytes.Compare(modified, content) != 0 {
var buf bytes.Buffer
buf.Write(modified)
return fatomic.WriteFile(file, &buf)
}

return nil
}

type linkOk struct {
sync.Mutex

ok uint32
}

var links = map[string]*linkOk{}
var linksLock sync.RWMutex
var notsecure = http.Client{Timeout: 10 * time.Minute}
var secure = http.Client{Timeout: 10 * time.Minute, CheckRedirect: httpsRedirect}

func processLink(link []byte) []byte {
linkStr := string(bytes.TrimPrefix(link, []byte("http://";)))

linksLock.RLock()
lo, ok := links[linkStr]
linksLock.RUnlock()

if !ok {
linksLock.Lock()

lo, ok = links[linkStr]
if !ok {
lo = &linkOk{}
links[linkStr] = lo
}

linksLock.Unlock()
}

for {
switch atomic.LoadUint32(&lo.ok) {
case 0:
lo.Lock()

if atomic.LoadUint32(&lo.ok) == 0 {
if httpsAble(linkStr) {
atomic.StoreUint32(&lo.ok, 2)
} else {
atomic.StoreUint32(&lo.ok, 1)
}
}

lo.Unlock()

continue
case 2:
return bytes.Replace(link, []byte("http://";), []byte("https://";), 1)
default:
return link
}
}
}

func httpsAble(link string) bool {
resp, errGt := secure.Get("https://"; + link)
if errGt != nil {
return false
}

defer resp.Body.Close()

if resp.StatusCode != 200 {
return false
}

resps, errGt := notsecure.Get("http://"; + link)
if errGt != nil {
return false
}

defer resps.Body.Close()

if resps.StatusCode != 200 {
return false
}

var buf, bufs bytes.Buffer

if _, errCp := io.Copy(&buf, resp.Body); errCp != nil {
return false
}

if _, errCp := io.Copy(&bufs, resps.Body); errCp != nil {
return false
}

return bytes.Compare(buf.Bytes(), bufs.Bytes()) == 0
}

var insecure = errors.New("insecure")

func httpsRedirect(req *http.Request, _ []*http.Request) error {
switch req.URL.Scheme {
case "", "https":
return nil
}

return insecure
}

@Maintainers Would any of you actually review like this? If yes, is the
pseudo-code not enough?

Well, Kees already mentioned he would like to see it :-) As he said,
it is usually the way for bulk patches to present the
algorithm/semantic patch/etc. that was used.

It is also useful to have it around so that it can be reused/reapplied
later on, too.

If you're going to reproduce one of my patches for reviewing, checkout torvalds/master and travel back to the latest commit before I submitted the patch. Then run the algo.

I didn't log that link-by-link. Maybe because I also didn't follow plain
HTTP redirects while opening HTTPS links. Maybe it even matched, but was
added after I made the changes.

It would be nice to have a list of links which cannot be converted,
since nowadays they are likely to be the exception rather than the
rule.

Don't worry, (by typing this words right now) I've noted this TODO of mine (after this patch round).

* Linus *didn't even respond* (at least I didn't receive anything) to my
catch-them-all patch at all, not even like please not as .gz attachment
or please split by subsystem

Please take into account that LKML volume is huge and Linus (like
everybody else) only reads a small subset. Further, as a general rule,

I've addressed Linus directly (CC LKML) and explicitly asked like will you have a look at this or not.

Linus shouldn't be picking individual patches to begin with because
that skips the review tree.

Who if not Linus shall review one huge patch spreading across lots of subsystems?

Cheers,
Miguel

Next message: Christian Brauner: "vdso_join_timens() question"
Previous message: Mark Brown: "Re: [PATCH 4/6] spi: altera: use regmap instead of direct mmio register access"
In reply to: Miguel Ojeda: "Re: [PATCH] Replace HTTP links with HTTPS ones: Documentation/translations/it_IT"
Next in thread: Miguel Ojeda: "Re: [PATCH] Replace HTTP links with HTTPS ones: Documentation/translations/it_IT"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]