You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
127 lines
2.9 KiB
127 lines
2.9 KiB
package parsers
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"git.aiterp.net/rpdata/api/models"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// A ParsedLog contains the parsed log header and its posts.
|
|
type ParsedLog struct {
|
|
Log models.Log
|
|
Posts []*models.Post
|
|
}
|
|
|
|
// ForumLog parses the logs from the data.
|
|
func ForumLog(data string, tz *time.Location) ([]ParsedLog, error) {
|
|
metadata := ForumLogMetadata(data)
|
|
results := make([]ParsedLog, 0, len(metadata["Date"]))
|
|
scanner := bufio.NewScanner(strings.NewReader(data))
|
|
|
|
for i, dateStr := range metadata["Date"] {
|
|
// Parse date
|
|
date, err := time.ParseInLocation("January 2, 2006", dateStr, tz)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse date #%d: %#+v is not the in the correct format of \"January 2, 2006\"", i+1, dateStr)
|
|
}
|
|
|
|
// Parse posts
|
|
posts := make([]*models.Post, 0, 128)
|
|
parsing := false
|
|
prev := ""
|
|
prevPost := models.Post{}
|
|
for scanner.Scan() {
|
|
line := strings.Trim(scanner.Text(), "\r\t ")
|
|
|
|
// Skip lines with links to other logs using the --> and <-- notation.
|
|
if strings.HasPrefix(line, "-") || strings.HasPrefix(line, "<") {
|
|
prev = line
|
|
continue
|
|
}
|
|
|
|
// If parsing and reaching a double empty-line, the session is done.
|
|
if parsing && len(line) < 2 && len(prev) < 2 {
|
|
break
|
|
}
|
|
|
|
// Skip empty lines, but record them as thep revious for the above check.
|
|
if len(line) < 2 {
|
|
prev = line
|
|
continue
|
|
}
|
|
|
|
// If not parsing, skip until the first mirclike post.
|
|
if !parsing {
|
|
if strings.HasPrefix(line, "[") {
|
|
parsing = true
|
|
} else {
|
|
prev = line
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Parse the post.
|
|
post, err := MircPost(line, date, prevPost)
|
|
if err != nil {
|
|
summary := ""
|
|
for _, ru := range line {
|
|
summary += string(ru)
|
|
if len(summary) > 60 {
|
|
summary += "..."
|
|
break
|
|
}
|
|
}
|
|
|
|
return nil, fmt.Errorf("failed to parse post: %s", summary)
|
|
}
|
|
|
|
posts = append(posts, &post)
|
|
prevPost = post
|
|
prev = line
|
|
}
|
|
|
|
// No posts means there's a problem.
|
|
if len(posts) == 0 {
|
|
return nil, fmt.Errorf("session %d (%s) has no posts (too many dates?)", i+1, dateStr)
|
|
}
|
|
|
|
// Add it.
|
|
results = append(results, ParsedLog{
|
|
Log: models.Log{Date: posts[0].Time},
|
|
Posts: posts,
|
|
})
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// ForumLogMetadata parses metadata, discards the broken parts, and returns the
|
|
// parsed data as a map (`m`) and the position of the first IRC post (`n`)
|
|
func ForumLogMetadata(data string) map[string][]string {
|
|
result := make(map[string][]string)
|
|
key := ""
|
|
|
|
scanner := bufio.NewScanner(strings.NewReader(data))
|
|
for scanner.Scan() {
|
|
line := strings.Trim(scanner.Text(), "\r\t ")
|
|
|
|
if strings.HasPrefix(line, "[") {
|
|
break
|
|
}
|
|
if len(line) < 1 {
|
|
key = ""
|
|
continue
|
|
}
|
|
|
|
if key == "" {
|
|
split := strings.Split(line, ":")
|
|
key = split[0]
|
|
} else {
|
|
result[key] = append(result[key], line)
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|