|
|
package parsers
import ( "bufio" "fmt" "git.aiterp.net/rpdata/api/models" "strings" "time" )
// A ParsedLog contains the parsed log header and its posts.
type ParsedLog struct { Log models.Log Posts []*models.Post }
// ForumLog parses the logs from the data.
func ForumLog(data string, tz *time.Location) ([]ParsedLog, error) { metadata := ForumLogMetadata(data) results := make([]ParsedLog, 0, len(metadata["Date"])) scanner := bufio.NewScanner(strings.NewReader(data))
for i, dateStr := range metadata["Date"] { // Parse date
date, err := time.ParseInLocation("January 2, 2006", dateStr, tz) if err != nil { return nil, fmt.Errorf("failed to parse date #%d: %#+v is not the in the correct format of \"January 2, 2006\"", i+1, dateStr) }
// Parse posts
posts := make([]*models.Post, 0, 128) parsing := false prev := "" prevPost := models.Post{} for scanner.Scan() { line := strings.Trim(scanner.Text(), "\r\t ")
// Skip lines with links to other logs using the --> and <-- notation.
if strings.HasPrefix(line, "-") || strings.HasPrefix(line, "<") { prev = line continue }
// If parsing and reaching a double empty-line, the session is done.
if parsing && len(line) < 2 && len(prev) < 2 { break }
// Skip empty lines, but record them as thep revious for the above check.
if len(line) < 2 { prev = line continue }
// If not parsing, skip until the first mirclike post.
if !parsing { if strings.HasPrefix(line, "[") { parsing = true } else { prev = line continue } }
// Parse the post.
post, err := MircPost(line, date, prevPost) if err != nil { summary := "" for _, ru := range line { summary += string(ru) if len(summary) > 60 { summary += "..." break } }
return nil, fmt.Errorf("failed to parse post: %s", summary) }
posts = append(posts, &post) prevPost = post prev = line }
// No posts means there's a problem.
if len(posts) == 0 { return nil, fmt.Errorf("session %d (%s) has no posts (too many dates?)", i+1, dateStr) }
// Add it.
results = append(results, ParsedLog{ Log: models.Log{Date: posts[0].Time}, Posts: posts, }) }
return results, nil }
// ForumLogMetadata parses metadata, discards the broken parts, and returns the
// parsed data as a map (`m`) and the position of the first IRC post (`n`)
func ForumLogMetadata(data string) map[string][]string { result := make(map[string][]string) key := ""
scanner := bufio.NewScanner(strings.NewReader(data)) for scanner.Scan() { line := strings.Trim(scanner.Text(), "\r\t ")
if strings.HasPrefix(line, "[") { break } if len(line) < 1 { key = "" continue }
if key == "" { split := strings.Split(line, ":") key = split[0] } else { result[key] = append(result[key], line) } }
return result }
|